forked from recloudstream/cloudstream
		
	added subtitle encoding fixed
This commit is contained in:
		
							parent
							
								
									fbf8a758a3
								
							
						
					
					
						commit
						be4ee26b83
					
				
					 2 changed files with 114 additions and 97 deletions
				
			
		|  | @ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R | ||||||
| import com.lagradost.cloudstream3.mvvm.logError | import com.lagradost.cloudstream3.mvvm.logError | ||||||
| import org.mozilla.universalchardet.UniversalDetector | import org.mozilla.universalchardet.UniversalDetector | ||||||
| import java.nio.ByteBuffer | import java.nio.ByteBuffer | ||||||
|  | import java.nio.charset.Charset | ||||||
| 
 | 
 | ||||||
| class CustomDecoder : SubtitleDecoder { | class CustomDecoder : SubtitleDecoder { | ||||||
|     companion object { |     companion object { | ||||||
|  | @ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder { | ||||||
|             ) |             ) | ||||||
|         val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*""")) |         val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*""")) | ||||||
| 
 | 
 | ||||||
|  |         //https://emptycharacter.com/ | ||||||
|  |         //https://www.fileformat.info/info/unicode/char/200b/index.htm | ||||||
|         fun trimStr(string: String): String { |         fun trimStr(string: String): String { | ||||||
|             return string.trimStart().trim('\uFEFF', '\u200B').replace( |             return string.trimStart().trim('\uFEFF', '\u200B').replace( | ||||||
|                 Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), |                 Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), | ||||||
|  | @ -77,110 +80,114 @@ class CustomDecoder : SubtitleDecoder { | ||||||
|         return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer() |         return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer() | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     private fun getStr(byteArray: ByteArray): Pair<String, Charset> { | ||||||
|  |         val encoding = try { | ||||||
|  |             val encoding = overrideEncoding ?: run { | ||||||
|  |                 val detector = UniversalDetector() | ||||||
|  | 
 | ||||||
|  |                 detector.handleData(byteArray, 0, byteArray.size) | ||||||
|  |                 detector.dataEnd() | ||||||
|  | 
 | ||||||
|  |                 detector.detectedCharset // "windows-1256" | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             Log.i( | ||||||
|  |                 TAG, | ||||||
|  |                 "Detected encoding with charset $encoding and override = $overrideEncoding" | ||||||
|  |             ) | ||||||
|  |             encoding ?: UTF_8 | ||||||
|  |         } catch (e: Exception) { | ||||||
|  |             Log.e(TAG, "Failed to detect encoding throwing error") | ||||||
|  |             logError(e) | ||||||
|  |             UTF_8 | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         return try { | ||||||
|  |             val set = charset(encoding) | ||||||
|  |             Pair(String(byteArray, set), set) | ||||||
|  |         } catch (e: Exception) { | ||||||
|  |             Log.e(TAG, "Failed to parse using encoding $encoding") | ||||||
|  |             logError(e) | ||||||
|  |             Pair(byteArray.decodeToString(), charset(UTF_8)) | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private fun getStr(input: SubtitleInputBuffer): String? { | ||||||
|  |         try { | ||||||
|  |             val data = input.data ?: return null | ||||||
|  |             data.position(0) | ||||||
|  |             val fullDataArr = ByteArray(data.remaining()) | ||||||
|  |             data.get(fullDataArr) | ||||||
|  |             return trimStr(getStr(fullDataArr).first) | ||||||
|  |         } catch (e: Exception) { | ||||||
|  |             Log.e(TAG, "Failed to parse text returning plain data") | ||||||
|  |             logError(e) | ||||||
|  |             return null | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) { |     override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) { | ||||||
|         Log.i(TAG, "queueInputBuffer") |         Log.i(TAG, "queueInputBuffer") | ||||||
|         try { |         try { | ||||||
|             if (realDecoder == null) { |             val inputString = getStr(inputBuffer) | ||||||
|                 inputBuffer.data?.let { data -> |             if (realDecoder == null && !inputString.isNullOrBlank()) { | ||||||
|                     // this way we read the subtitle file and decide what decoder to use instead of relying on mimetype |                 var str: String = inputString | ||||||
|                     Log.i(TAG, "Got data from queueInputBuffer") |                 // this way we read the subtitle file and decide what decoder to use instead of relying on mimetype | ||||||
| 
 |                 Log.i(TAG, "Got data from queueInputBuffer") | ||||||
|                     var (str, charset) = try { |                 //https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388 | ||||||
|                         data.position(0) |                 realDecoder = when { | ||||||
|                         val fullDataArr = ByteArray(data.remaining()) |                     str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder() | ||||||
|                         data.get(fullDataArr) |                     str.startsWith("<?xml version=\"", ignoreCase = true) -> TtmlDecoder() | ||||||
|                         val encoding = try { |                     (str.startsWith( | ||||||
|                             val encoding = overrideEncoding ?: run { |                         "[Script Info]", | ||||||
|                                 val detector = UniversalDetector() |                         ignoreCase = true | ||||||
| 
 |                     ) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder() | ||||||
|                                 detector.handleData(fullDataArr, 0, fullDataArr.size) |                     str.startsWith("1", ignoreCase = true) -> SubripDecoder() | ||||||
|                                 detector.dataEnd() |                     else -> null | ||||||
| 
 |                 } | ||||||
|                                 detector.detectedCharset // "windows-1256" |                 Log.i( | ||||||
|  |                     TAG, | ||||||
|  |                     "Decoder selected: $realDecoder" | ||||||
|  |                 ) | ||||||
|  |                 realDecoder?.let { decoder -> | ||||||
|  |                     decoder.dequeueInputBuffer()?.let { buff -> | ||||||
|  |                         if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) { | ||||||
|  |                             captionRegex.forEach { rgx -> | ||||||
|  |                                 str = str.replace(rgx, "\n") | ||||||
|  |                             } | ||||||
|  |                             bloatRegex.forEach { rgx -> | ||||||
|  |                                 str = str.replace(rgx, "\n") | ||||||
|                             } |                             } | ||||||
| 
 |  | ||||||
|                             Log.i( |  | ||||||
|                                 TAG, |  | ||||||
|                                 "Detected encoding with charset $encoding and override = $overrideEncoding" |  | ||||||
|                             ) |  | ||||||
|                             encoding ?: UTF_8 |  | ||||||
|                         } catch (e: Exception) { |  | ||||||
|                             Log.e(TAG, "Failed to detect encoding throwing error") |  | ||||||
|                             logError(e) |  | ||||||
|                             UTF_8 |  | ||||||
|                         } |                         } | ||||||
|  |                         buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8))) | ||||||
| 
 | 
 | ||||||
|                         var (fullStr, charset) = try { |                         decoder.queueInputBuffer(buff) | ||||||
|                             val set = charset(encoding) |  | ||||||
|                             Pair(String(fullDataArr, set), set) |  | ||||||
|                         } catch (e: Exception) { |  | ||||||
|                             Log.e(TAG, "Failed to parse using encoding $encoding") |  | ||||||
|                             logError(e) |  | ||||||
|                             Pair(fullDataArr.decodeToString(), charset(UTF_8)) |  | ||||||
|                         } |  | ||||||
| 
 |  | ||||||
|                         bloatRegex.forEach { rgx -> |  | ||||||
|                             fullStr = fullStr.replace(rgx, "\n") |  | ||||||
|                         } |  | ||||||
| 
 |  | ||||||
|                         fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n") |  | ||||||
|                         // fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" + |  | ||||||
|                         //         "تــــرجــمة" |  | ||||||
| 
 |  | ||||||
|                         Log.i( |                         Log.i( | ||||||
|                             TAG, |                             TAG, | ||||||
|                             "Encoded Text start: " + fullStr.substring( |                             "Decoder queueInputBuffer successfully" | ||||||
|                                 0, |  | ||||||
|                                 minOf(fullStr.length, 300) |  | ||||||
|                             ) |  | ||||||
|                         ) |                         ) | ||||||
|                         Pair(fullStr, charset) |  | ||||||
|                     } catch (e: Exception) { |  | ||||||
|                         Log.e(TAG, "Failed to parse text returning plain data") |  | ||||||
|                         logError(e) |  | ||||||
|                         return |  | ||||||
|                     } |  | ||||||
|                     //https://emptycharacter.com/ |  | ||||||
|                     //https://www.fileformat.info/info/unicode/char/200b/index.htm |  | ||||||
|                     //val str = trimStr(arr.decodeToString()) |  | ||||||
|                     //Log.i(TAG, "first string is >>>$str<<<") |  | ||||||
|                     if (str.isNotEmpty()) { |  | ||||||
|                         //https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388 |  | ||||||
|                         realDecoder = when { |  | ||||||
|                             str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder() |  | ||||||
|                             str.startsWith("<?xml version=\"", ignoreCase = true) -> TtmlDecoder() |  | ||||||
|                             (str.startsWith( |  | ||||||
|                                 "[Script Info]", |  | ||||||
|                                 ignoreCase = true |  | ||||||
|                             ) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder() |  | ||||||
|                             str.startsWith("1", ignoreCase = true) -> SubripDecoder() |  | ||||||
|                             else -> null |  | ||||||
|                         } |  | ||||||
|                         Log.i( |  | ||||||
|                             TAG, |  | ||||||
|                             "Decoder selected: $realDecoder" |  | ||||||
|                         ) |  | ||||||
|                         realDecoder?.let { decoder -> |  | ||||||
|                             decoder.dequeueInputBuffer()?.let { buff -> |  | ||||||
|                                 if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) { |  | ||||||
|                                     captionRegex.forEach { rgx -> |  | ||||||
|                                         str = str.replace(rgx, "\n") |  | ||||||
|                                     } |  | ||||||
|                                 } |  | ||||||
| 
 |  | ||||||
|                                 buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset)) |  | ||||||
| 
 |  | ||||||
|                                 decoder.queueInputBuffer(buff) |  | ||||||
|                                 Log.i( |  | ||||||
|                                     TAG, |  | ||||||
|                                     "Decoder queueInputBuffer successfully" |  | ||||||
|                                 ) |  | ||||||
|                             } |  | ||||||
|                             CS3IPlayer.requestSubtitleUpdate?.invoke() |  | ||||||
|                         } |  | ||||||
|                     } |                     } | ||||||
|  |                     CS3IPlayer.requestSubtitleUpdate?.invoke() | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|  |                 Log.i( | ||||||
|  |                     TAG, | ||||||
|  |                     "Decoder else queueInputBuffer successfully" | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |                 if (!inputString.isNullOrBlank()) { | ||||||
|  |                     var str: String = inputString | ||||||
|  |                     if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) { | ||||||
|  |                         captionRegex.forEach { rgx -> | ||||||
|  |                             str = str.replace(rgx, "\n") | ||||||
|  |                         } | ||||||
|  |                         bloatRegex.forEach { rgx -> | ||||||
|  |                             str = str.replace(rgx, "\n") | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                     inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8))) | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|                 realDecoder?.queueInputBuffer(inputBuffer) |                 realDecoder?.queueInputBuffer(inputBuffer) | ||||||
|             } |             } | ||||||
|         } catch (e: Exception) { |         } catch (e: Exception) { | ||||||
|  |  | ||||||
|  | @ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() { | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 sourceDialog.setOnDismissListener { |                 var shouldDismiss = true | ||||||
|  | 
 | ||||||
|  |                 fun dismiss() { | ||||||
|                     if (isPlaying) { |                     if (isPlaying) { | ||||||
|                         player.handleEvent(CSPlayerEvent.Play) |                         player.handleEvent(CSPlayerEvent.Play) | ||||||
|                     } |                     } | ||||||
|                     activity?.hideSystemUI() |                     activity?.hideSystemUI() | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 sourceDialog.setOnDismissListener { | ||||||
|  |                     if (shouldDismiss) dismiss() | ||||||
|                     selectSourceDialog = null |                     selectSourceDialog = null | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|  | @ -325,7 +331,7 @@ class GeneratorPlayer : FullScreenPlayer() { | ||||||
|                         null |                         null | ||||||
|                     ) |                     ) | ||||||
|                     val index = prefValues.indexOf(value) |                     val index = prefValues.indexOf(value) | ||||||
|                     text = prefNames[if(index == -1) 0 else index] |                     text = prefNames[if (index == -1) 0 else index] | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 sourceDialog.subtitles_click_settings?.setOnClickListener { |                 sourceDialog.subtitles_click_settings?.setOnClickListener { | ||||||
|  | @ -340,11 +346,13 @@ class GeneratorPlayer : FullScreenPlayer() { | ||||||
|                             null |                             null | ||||||
|                         ) |                         ) | ||||||
| 
 | 
 | ||||||
|                     val index = prefValues.indexOf(currentPrefMedia) |                     shouldDismiss = false | ||||||
|                     sourceDialog.dismissSafe(activity) |                     sourceDialog.dismissSafe(activity) | ||||||
|  | 
 | ||||||
|  |                     val index = prefValues.indexOf(currentPrefMedia) | ||||||
|                     activity?.showDialog( |                     activity?.showDialog( | ||||||
|                         prefNames.toList(), |                         prefNames.toList(), | ||||||
|                         if(index == -1) 0 else index, |                         if (index == -1) 0 else index, | ||||||
|                         ctx.getString(R.string.subtitles_encoding), |                         ctx.getString(R.string.subtitles_encoding), | ||||||
|                         true, |                         true, | ||||||
|                         {}) { |                         {}) { | ||||||
|  | @ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() { | ||||||
|                                 prefValues[it] |                                 prefValues[it] | ||||||
|                             ) |                             ) | ||||||
|                             .apply() |                             .apply() | ||||||
|                         println("FORCED ENCODING: ${prefValues[it]}") | 
 | ||||||
|                         updateForcedEncoding(ctx) |                         updateForcedEncoding(ctx) | ||||||
|  |                         dismiss() | ||||||
|  |                         player.seekTime(-1) // to update subtitles, a dirty trick | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue