From be4ee26b83d7bdd5537ef867bda9778fd20bdd68 Mon Sep 17 00:00:00 2001 From: LagradOst <11805592+LagradOst@users.noreply.github.com> Date: Thu, 2 Jun 2022 21:28:09 +0200 Subject: [PATCH] added subtitle encoding fixed --- .../ui/player/CustomSubtitleDecoderFactory.kt | 191 +++++++++--------- .../cloudstream3/ui/player/GeneratorPlayer.kt | 20 +- 2 files changed, 114 insertions(+), 97 deletions(-) diff --git a/app/src/main/java/com/lagradost/cloudstream3/ui/player/CustomSubtitleDecoderFactory.kt b/app/src/main/java/com/lagradost/cloudstream3/ui/player/CustomSubtitleDecoderFactory.kt index f727e8c0..29fe6e72 100644 --- a/app/src/main/java/com/lagradost/cloudstream3/ui/player/CustomSubtitleDecoderFactory.kt +++ b/app/src/main/java/com/lagradost/cloudstream3/ui/player/CustomSubtitleDecoderFactory.kt @@ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R import com.lagradost.cloudstream3.mvvm.logError import org.mozilla.universalchardet.UniversalDetector import java.nio.ByteBuffer +import java.nio.charset.Charset class CustomDecoder : SubtitleDecoder { companion object { @@ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder { ) val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*""")) + //https://emptycharacter.com/ + //https://www.fileformat.info/info/unicode/char/200b/index.htm fun trimStr(string: String): String { return string.trimStart().trim('\uFEFF', '\u200B').replace( Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), @@ -77,110 +80,114 @@ class CustomDecoder : SubtitleDecoder { return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer() } + private fun getStr(byteArray: ByteArray): Pair { + val encoding = try { + val encoding = overrideEncoding ?: run { + val detector = UniversalDetector() + + detector.handleData(byteArray, 0, byteArray.size) + detector.dataEnd() + + detector.detectedCharset // "windows-1256" + } + + Log.i( + TAG, + "Detected encoding with charset $encoding and override = $overrideEncoding" + ) + encoding ?: UTF_8 + } catch (e: Exception) { + Log.e(TAG, "Failed to detect encoding throwing error") + logError(e) + UTF_8 + } + + return try { + val set = charset(encoding) + Pair(String(byteArray, set), set) + } catch (e: Exception) { + Log.e(TAG, "Failed to parse using encoding $encoding") + logError(e) + Pair(byteArray.decodeToString(), charset(UTF_8)) + } + } + + private fun getStr(input: SubtitleInputBuffer): String? { + try { + val data = input.data ?: return null + data.position(0) + val fullDataArr = ByteArray(data.remaining()) + data.get(fullDataArr) + return trimStr(getStr(fullDataArr).first) + } catch (e: Exception) { + Log.e(TAG, "Failed to parse text returning plain data") + logError(e) + return null + } + } + override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) { Log.i(TAG, "queueInputBuffer") try { - if (realDecoder == null) { - inputBuffer.data?.let { data -> - // this way we read the subtitle file and decide what decoder to use instead of relying on mimetype - Log.i(TAG, "Got data from queueInputBuffer") - - var (str, charset) = try { - data.position(0) - val fullDataArr = ByteArray(data.remaining()) - data.get(fullDataArr) - val encoding = try { - val encoding = overrideEncoding ?: run { - val detector = UniversalDetector() - - detector.handleData(fullDataArr, 0, fullDataArr.size) - detector.dataEnd() - - detector.detectedCharset // "windows-1256" + val inputString = getStr(inputBuffer) + if (realDecoder == null && !inputString.isNullOrBlank()) { + var str: String = inputString + // this way we read the subtitle file and decide what decoder to use instead of relying on mimetype + Log.i(TAG, "Got data from queueInputBuffer") + //https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388 + realDecoder = when { + str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder() + str.startsWith(" TtmlDecoder() + (str.startsWith( + "[Script Info]", + ignoreCase = true + ) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder() + str.startsWith("1", ignoreCase = true) -> SubripDecoder() + else -> null + } + Log.i( + TAG, + "Decoder selected: $realDecoder" + ) + realDecoder?.let { decoder -> + decoder.dequeueInputBuffer()?.let { buff -> + if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) { + captionRegex.forEach { rgx -> + str = str.replace(rgx, "\n") + } + bloatRegex.forEach { rgx -> + str = str.replace(rgx, "\n") } - - Log.i( - TAG, - "Detected encoding with charset $encoding and override = $overrideEncoding" - ) - encoding ?: UTF_8 - } catch (e: Exception) { - Log.e(TAG, "Failed to detect encoding throwing error") - logError(e) - UTF_8 } + buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8))) - var (fullStr, charset) = try { - val set = charset(encoding) - Pair(String(fullDataArr, set), set) - } catch (e: Exception) { - Log.e(TAG, "Failed to parse using encoding $encoding") - logError(e) - Pair(fullDataArr.decodeToString(), charset(UTF_8)) - } - - bloatRegex.forEach { rgx -> - fullStr = fullStr.replace(rgx, "\n") - } - - fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n") - // fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" + - // "تــــرجــمة" - + decoder.queueInputBuffer(buff) Log.i( TAG, - "Encoded Text start: " + fullStr.substring( - 0, - minOf(fullStr.length, 300) - ) + "Decoder queueInputBuffer successfully" ) - Pair(fullStr, charset) - } catch (e: Exception) { - Log.e(TAG, "Failed to parse text returning plain data") - logError(e) - return - } - //https://emptycharacter.com/ - //https://www.fileformat.info/info/unicode/char/200b/index.htm - //val str = trimStr(arr.decodeToString()) - //Log.i(TAG, "first string is >>>$str<<<") - if (str.isNotEmpty()) { - //https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388 - realDecoder = when { - str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder() - str.startsWith(" TtmlDecoder() - (str.startsWith( - "[Script Info]", - ignoreCase = true - ) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder() - str.startsWith("1", ignoreCase = true) -> SubripDecoder() - else -> null - } - Log.i( - TAG, - "Decoder selected: $realDecoder" - ) - realDecoder?.let { decoder -> - decoder.dequeueInputBuffer()?.let { buff -> - if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) { - captionRegex.forEach { rgx -> - str = str.replace(rgx, "\n") - } - } - - buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset)) - - decoder.queueInputBuffer(buff) - Log.i( - TAG, - "Decoder queueInputBuffer successfully" - ) - } - CS3IPlayer.requestSubtitleUpdate?.invoke() - } } + CS3IPlayer.requestSubtitleUpdate?.invoke() } } else { + Log.i( + TAG, + "Decoder else queueInputBuffer successfully" + ) + + if (!inputString.isNullOrBlank()) { + var str: String = inputString + if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) { + captionRegex.forEach { rgx -> + str = str.replace(rgx, "\n") + } + bloatRegex.forEach { rgx -> + str = str.replace(rgx, "\n") + } + } + inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8))) + } + realDecoder?.queueInputBuffer(inputBuffer) } } catch (e: Exception) { diff --git a/app/src/main/java/com/lagradost/cloudstream3/ui/player/GeneratorPlayer.kt b/app/src/main/java/com/lagradost/cloudstream3/ui/player/GeneratorPlayer.kt index 68053cb6..5c3e7fdb 100644 --- a/app/src/main/java/com/lagradost/cloudstream3/ui/player/GeneratorPlayer.kt +++ b/app/src/main/java/com/lagradost/cloudstream3/ui/player/GeneratorPlayer.kt @@ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() { } } - sourceDialog.setOnDismissListener { + var shouldDismiss = true + + fun dismiss() { if (isPlaying) { player.handleEvent(CSPlayerEvent.Play) } activity?.hideSystemUI() + } + + sourceDialog.setOnDismissListener { + if (shouldDismiss) dismiss() selectSourceDialog = null } @@ -325,7 +331,7 @@ class GeneratorPlayer : FullScreenPlayer() { null ) val index = prefValues.indexOf(value) - text = prefNames[if(index == -1) 0 else index] + text = prefNames[if (index == -1) 0 else index] } sourceDialog.subtitles_click_settings?.setOnClickListener { @@ -340,11 +346,13 @@ class GeneratorPlayer : FullScreenPlayer() { null ) - val index = prefValues.indexOf(currentPrefMedia) + shouldDismiss = false sourceDialog.dismissSafe(activity) + + val index = prefValues.indexOf(currentPrefMedia) activity?.showDialog( prefNames.toList(), - if(index == -1) 0 else index, + if (index == -1) 0 else index, ctx.getString(R.string.subtitles_encoding), true, {}) { @@ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() { prefValues[it] ) .apply() - println("FORCED ENCODING: ${prefValues[it]}") + updateForcedEncoding(ctx) + dismiss() + player.seekTime(-1) // to update subtitles, a dirty trick } }