added subtitle encoding fixed

This commit is contained in:
LagradOst 2022-06-02 21:28:09 +02:00
parent fbf8a758a3
commit be4ee26b83
2 changed files with 114 additions and 97 deletions

View file

@ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R
import com.lagradost.cloudstream3.mvvm.logError
import org.mozilla.universalchardet.UniversalDetector
import java.nio.ByteBuffer
import java.nio.charset.Charset
class CustomDecoder : SubtitleDecoder {
companion object {
@ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder {
)
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
//https://emptycharacter.com/
//https://www.fileformat.info/info/unicode/char/200b/index.htm
fun trimStr(string: String): String {
return string.trimStart().trim('\uFEFF', '\u200B').replace(
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
@ -77,23 +80,12 @@ class CustomDecoder : SubtitleDecoder {
return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer()
}
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
Log.i(TAG, "queueInputBuffer")
try {
if (realDecoder == null) {
inputBuffer.data?.let { data ->
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
Log.i(TAG, "Got data from queueInputBuffer")
var (str, charset) = try {
data.position(0)
val fullDataArr = ByteArray(data.remaining())
data.get(fullDataArr)
private fun getStr(byteArray: ByteArray): Pair<String, Charset> {
val encoding = try {
val encoding = overrideEncoding ?: run {
val detector = UniversalDetector()
detector.handleData(fullDataArr, 0, fullDataArr.size)
detector.handleData(byteArray, 0, byteArray.size)
detector.dataEnd()
detector.detectedCharset // "windows-1256"
@ -110,41 +102,38 @@ class CustomDecoder : SubtitleDecoder {
UTF_8
}
var (fullStr, charset) = try {
return try {
val set = charset(encoding)
Pair(String(fullDataArr, set), set)
Pair(String(byteArray, set), set)
} catch (e: Exception) {
Log.e(TAG, "Failed to parse using encoding $encoding")
logError(e)
Pair(fullDataArr.decodeToString(), charset(UTF_8))
Pair(byteArray.decodeToString(), charset(UTF_8))
}
}
bloatRegex.forEach { rgx ->
fullStr = fullStr.replace(rgx, "\n")
}
fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n")
// fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" +
// "تــــرجــمة"
Log.i(
TAG,
"Encoded Text start: " + fullStr.substring(
0,
minOf(fullStr.length, 300)
)
)
Pair(fullStr, charset)
private fun getStr(input: SubtitleInputBuffer): String? {
try {
val data = input.data ?: return null
data.position(0)
val fullDataArr = ByteArray(data.remaining())
data.get(fullDataArr)
return trimStr(getStr(fullDataArr).first)
} catch (e: Exception) {
Log.e(TAG, "Failed to parse text returning plain data")
logError(e)
return
return null
}
//https://emptycharacter.com/
//https://www.fileformat.info/info/unicode/char/200b/index.htm
//val str = trimStr(arr.decodeToString())
//Log.i(TAG, "first string is >>>$str<<<")
if (str.isNotEmpty()) {
}
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
Log.i(TAG, "queueInputBuffer")
try {
val inputString = getStr(inputBuffer)
if (realDecoder == null && !inputString.isNullOrBlank()) {
var str: String = inputString
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
Log.i(TAG, "Got data from queueInputBuffer")
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
realDecoder = when {
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
@ -166,9 +155,11 @@ class CustomDecoder : SubtitleDecoder {
captionRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
bloatRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset))
}
buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
decoder.queueInputBuffer(buff)
Log.i(
@ -178,9 +169,25 @@ class CustomDecoder : SubtitleDecoder {
}
CS3IPlayer.requestSubtitleUpdate?.invoke()
}
}
}
} else {
Log.i(
TAG,
"Decoder else queueInputBuffer successfully"
)
if (!inputString.isNullOrBlank()) {
var str: String = inputString
if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) {
captionRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
bloatRegex.forEach { rgx ->
str = str.replace(rgx, "\n")
}
}
inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
}
realDecoder?.queueInputBuffer(inputBuffer)
}
} catch (e: Exception) {

View file

@ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() {
}
}
sourceDialog.setOnDismissListener {
var shouldDismiss = true
fun dismiss() {
if (isPlaying) {
player.handleEvent(CSPlayerEvent.Play)
}
activity?.hideSystemUI()
}
sourceDialog.setOnDismissListener {
if (shouldDismiss) dismiss()
selectSourceDialog = null
}
@ -340,8 +346,10 @@ class GeneratorPlayer : FullScreenPlayer() {
null
)
val index = prefValues.indexOf(currentPrefMedia)
shouldDismiss = false
sourceDialog.dismissSafe(activity)
val index = prefValues.indexOf(currentPrefMedia)
activity?.showDialog(
prefNames.toList(),
if (index == -1) 0 else index,
@ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() {
prefValues[it]
)
.apply()
println("FORCED ENCODING: ${prefValues[it]}")
updateForcedEncoding(ctx)
dismiss()
player.seekTime(-1) // to update subtitles, a dirty trick
}
}