added subtitle encoding fixed

This commit is contained in:
LagradOst 2022-06-02 21:28:09 +02:00
parent fbf8a758a3
commit be4ee26b83
2 changed files with 114 additions and 97 deletions

View file

@ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R
import com.lagradost.cloudstream3.mvvm.logError import com.lagradost.cloudstream3.mvvm.logError
import org.mozilla.universalchardet.UniversalDetector import org.mozilla.universalchardet.UniversalDetector
import java.nio.ByteBuffer import java.nio.ByteBuffer
import java.nio.charset.Charset
class CustomDecoder : SubtitleDecoder { class CustomDecoder : SubtitleDecoder {
companion object { companion object {
@ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder {
) )
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*""")) val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
//https://emptycharacter.com/
//https://www.fileformat.info/info/unicode/char/200b/index.htm
fun trimStr(string: String): String { fun trimStr(string: String): String {
return string.trimStart().trim('\uFEFF', '\u200B').replace( return string.trimStart().trim('\uFEFF', '\u200B').replace(
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
@ -77,23 +80,12 @@ class CustomDecoder : SubtitleDecoder {
return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer() return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer()
} }
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) { private fun getStr(byteArray: ByteArray): Pair<String, Charset> {
Log.i(TAG, "queueInputBuffer")
try {
if (realDecoder == null) {
inputBuffer.data?.let { data ->
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
Log.i(TAG, "Got data from queueInputBuffer")
var (str, charset) = try {
data.position(0)
val fullDataArr = ByteArray(data.remaining())
data.get(fullDataArr)
val encoding = try { val encoding = try {
val encoding = overrideEncoding ?: run { val encoding = overrideEncoding ?: run {
val detector = UniversalDetector() val detector = UniversalDetector()
detector.handleData(fullDataArr, 0, fullDataArr.size) detector.handleData(byteArray, 0, byteArray.size)
detector.dataEnd() detector.dataEnd()
detector.detectedCharset // "windows-1256" detector.detectedCharset // "windows-1256"
@ -110,41 +102,38 @@ class CustomDecoder : SubtitleDecoder {
UTF_8 UTF_8
} }
var (fullStr, charset) = try { return try {
val set = charset(encoding) val set = charset(encoding)
Pair(String(fullDataArr, set), set) Pair(String(byteArray, set), set)
} catch (e: Exception) { } catch (e: Exception) {
Log.e(TAG, "Failed to parse using encoding $encoding") Log.e(TAG, "Failed to parse using encoding $encoding")
logError(e) logError(e)
Pair(fullDataArr.decodeToString(), charset(UTF_8)) Pair(byteArray.decodeToString(), charset(UTF_8))
}
} }
bloatRegex.forEach { rgx -> private fun getStr(input: SubtitleInputBuffer): String? {
fullStr = fullStr.replace(rgx, "\n") try {
} val data = input.data ?: return null
data.position(0)
fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n") val fullDataArr = ByteArray(data.remaining())
// fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" + data.get(fullDataArr)
// "تــــرجــمة" return trimStr(getStr(fullDataArr).first)
Log.i(
TAG,
"Encoded Text start: " + fullStr.substring(
0,
minOf(fullStr.length, 300)
)
)
Pair(fullStr, charset)
} catch (e: Exception) { } catch (e: Exception) {
Log.e(TAG, "Failed to parse text returning plain data") Log.e(TAG, "Failed to parse text returning plain data")
logError(e) logError(e)
return return null
} }
//https://emptycharacter.com/ }
//https://www.fileformat.info/info/unicode/char/200b/index.htm
//val str = trimStr(arr.decodeToString()) override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
//Log.i(TAG, "first string is >>>$str<<<") Log.i(TAG, "queueInputBuffer")
if (str.isNotEmpty()) { try {
val inputString = getStr(inputBuffer)
if (realDecoder == null && !inputString.isNullOrBlank()) {
var str: String = inputString
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
Log.i(TAG, "Got data from queueInputBuffer")
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388 //https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
realDecoder = when { realDecoder = when {
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder() str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
@ -166,9 +155,11 @@ class CustomDecoder : SubtitleDecoder {
captionRegex.forEach { rgx -> captionRegex.forEach { rgx ->
str = str.replace(rgx, "\n") str = str.replace(rgx, "\n")
} }
bloatRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
} }
}
buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset)) buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
decoder.queueInputBuffer(buff) decoder.queueInputBuffer(buff)
Log.i( Log.i(
@ -178,9 +169,25 @@ class CustomDecoder : SubtitleDecoder {
} }
CS3IPlayer.requestSubtitleUpdate?.invoke() CS3IPlayer.requestSubtitleUpdate?.invoke()
} }
}
}
} else { } else {
Log.i(
TAG,
"Decoder else queueInputBuffer successfully"
)
if (!inputString.isNullOrBlank()) {
var str: String = inputString
if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) {
captionRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
bloatRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
}
inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
}
realDecoder?.queueInputBuffer(inputBuffer) realDecoder?.queueInputBuffer(inputBuffer)
} }
} catch (e: Exception) { } catch (e: Exception) {

View file

@ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() {
} }
} }
sourceDialog.setOnDismissListener { var shouldDismiss = true
fun dismiss() {
if (isPlaying) { if (isPlaying) {
player.handleEvent(CSPlayerEvent.Play) player.handleEvent(CSPlayerEvent.Play)
} }
activity?.hideSystemUI() activity?.hideSystemUI()
}
sourceDialog.setOnDismissListener {
if (shouldDismiss) dismiss()
selectSourceDialog = null selectSourceDialog = null
} }
@ -340,8 +346,10 @@ class GeneratorPlayer : FullScreenPlayer() {
null null
) )
val index = prefValues.indexOf(currentPrefMedia) shouldDismiss = false
sourceDialog.dismissSafe(activity) sourceDialog.dismissSafe(activity)
val index = prefValues.indexOf(currentPrefMedia)
activity?.showDialog( activity?.showDialog(
prefNames.toList(), prefNames.toList(),
if (index == -1) 0 else index, if (index == -1) 0 else index,
@ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() {
prefValues[it] prefValues[it]
) )
.apply() .apply()
println("FORCED ENCODING: ${prefValues[it]}")
updateForcedEncoding(ctx) updateForcedEncoding(ctx)
dismiss()
player.seekTime(-1) // to update subtitles, a dirty trick
} }
} }