mirror of
https://github.com/recloudstream/cloudstream.git
synced 2024-08-15 01:53:11 +00:00
added subtitle encoding fixed
This commit is contained in:
parent
fbf8a758a3
commit
be4ee26b83
2 changed files with 114 additions and 97 deletions
|
@ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R
|
|||
import com.lagradost.cloudstream3.mvvm.logError
|
||||
import org.mozilla.universalchardet.UniversalDetector
|
||||
import java.nio.ByteBuffer
|
||||
import java.nio.charset.Charset
|
||||
|
||||
class CustomDecoder : SubtitleDecoder {
|
||||
companion object {
|
||||
|
@ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder {
|
|||
)
|
||||
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
|
||||
|
||||
//https://emptycharacter.com/
|
||||
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
||||
fun trimStr(string: String): String {
|
||||
return string.trimStart().trim('\uFEFF', '\u200B').replace(
|
||||
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
|
||||
|
@ -77,110 +80,114 @@ class CustomDecoder : SubtitleDecoder {
|
|||
return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer()
|
||||
}
|
||||
|
||||
private fun getStr(byteArray: ByteArray): Pair<String, Charset> {
|
||||
val encoding = try {
|
||||
val encoding = overrideEncoding ?: run {
|
||||
val detector = UniversalDetector()
|
||||
|
||||
detector.handleData(byteArray, 0, byteArray.size)
|
||||
detector.dataEnd()
|
||||
|
||||
detector.detectedCharset // "windows-1256"
|
||||
}
|
||||
|
||||
Log.i(
|
||||
TAG,
|
||||
"Detected encoding with charset $encoding and override = $overrideEncoding"
|
||||
)
|
||||
encoding ?: UTF_8
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to detect encoding throwing error")
|
||||
logError(e)
|
||||
UTF_8
|
||||
}
|
||||
|
||||
return try {
|
||||
val set = charset(encoding)
|
||||
Pair(String(byteArray, set), set)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to parse using encoding $encoding")
|
||||
logError(e)
|
||||
Pair(byteArray.decodeToString(), charset(UTF_8))
|
||||
}
|
||||
}
|
||||
|
||||
private fun getStr(input: SubtitleInputBuffer): String? {
|
||||
try {
|
||||
val data = input.data ?: return null
|
||||
data.position(0)
|
||||
val fullDataArr = ByteArray(data.remaining())
|
||||
data.get(fullDataArr)
|
||||
return trimStr(getStr(fullDataArr).first)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to parse text returning plain data")
|
||||
logError(e)
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
|
||||
Log.i(TAG, "queueInputBuffer")
|
||||
try {
|
||||
if (realDecoder == null) {
|
||||
inputBuffer.data?.let { data ->
|
||||
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
|
||||
Log.i(TAG, "Got data from queueInputBuffer")
|
||||
|
||||
var (str, charset) = try {
|
||||
data.position(0)
|
||||
val fullDataArr = ByteArray(data.remaining())
|
||||
data.get(fullDataArr)
|
||||
val encoding = try {
|
||||
val encoding = overrideEncoding ?: run {
|
||||
val detector = UniversalDetector()
|
||||
|
||||
detector.handleData(fullDataArr, 0, fullDataArr.size)
|
||||
detector.dataEnd()
|
||||
|
||||
detector.detectedCharset // "windows-1256"
|
||||
val inputString = getStr(inputBuffer)
|
||||
if (realDecoder == null && !inputString.isNullOrBlank()) {
|
||||
var str: String = inputString
|
||||
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
|
||||
Log.i(TAG, "Got data from queueInputBuffer")
|
||||
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
|
||||
realDecoder = when {
|
||||
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
|
||||
str.startsWith("<?xml version=\"", ignoreCase = true) -> TtmlDecoder()
|
||||
(str.startsWith(
|
||||
"[Script Info]",
|
||||
ignoreCase = true
|
||||
) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder()
|
||||
str.startsWith("1", ignoreCase = true) -> SubripDecoder()
|
||||
else -> null
|
||||
}
|
||||
Log.i(
|
||||
TAG,
|
||||
"Decoder selected: $realDecoder"
|
||||
)
|
||||
realDecoder?.let { decoder ->
|
||||
decoder.dequeueInputBuffer()?.let { buff ->
|
||||
if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) {
|
||||
captionRegex.forEach { rgx ->
|
||||
str = str.replace(rgx, "\n")
|
||||
}
|
||||
bloatRegex.forEach { rgx ->
|
||||
str = str.replace(rgx, "\n")
|
||||
}
|
||||
|
||||
Log.i(
|
||||
TAG,
|
||||
"Detected encoding with charset $encoding and override = $overrideEncoding"
|
||||
)
|
||||
encoding ?: UTF_8
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to detect encoding throwing error")
|
||||
logError(e)
|
||||
UTF_8
|
||||
}
|
||||
buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
|
||||
|
||||
var (fullStr, charset) = try {
|
||||
val set = charset(encoding)
|
||||
Pair(String(fullDataArr, set), set)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to parse using encoding $encoding")
|
||||
logError(e)
|
||||
Pair(fullDataArr.decodeToString(), charset(UTF_8))
|
||||
}
|
||||
|
||||
bloatRegex.forEach { rgx ->
|
||||
fullStr = fullStr.replace(rgx, "\n")
|
||||
}
|
||||
|
||||
fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n")
|
||||
// fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" +
|
||||
// "تــــرجــمة"
|
||||
|
||||
decoder.queueInputBuffer(buff)
|
||||
Log.i(
|
||||
TAG,
|
||||
"Encoded Text start: " + fullStr.substring(
|
||||
0,
|
||||
minOf(fullStr.length, 300)
|
||||
)
|
||||
"Decoder queueInputBuffer successfully"
|
||||
)
|
||||
Pair(fullStr, charset)
|
||||
} catch (e: Exception) {
|
||||
Log.e(TAG, "Failed to parse text returning plain data")
|
||||
logError(e)
|
||||
return
|
||||
}
|
||||
//https://emptycharacter.com/
|
||||
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
||||
//val str = trimStr(arr.decodeToString())
|
||||
//Log.i(TAG, "first string is >>>$str<<<")
|
||||
if (str.isNotEmpty()) {
|
||||
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
|
||||
realDecoder = when {
|
||||
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
|
||||
str.startsWith("<?xml version=\"", ignoreCase = true) -> TtmlDecoder()
|
||||
(str.startsWith(
|
||||
"[Script Info]",
|
||||
ignoreCase = true
|
||||
) || str.startsWith("Title:", ignoreCase = true)) -> SsaDecoder()
|
||||
str.startsWith("1", ignoreCase = true) -> SubripDecoder()
|
||||
else -> null
|
||||
}
|
||||
Log.i(
|
||||
TAG,
|
||||
"Decoder selected: $realDecoder"
|
||||
)
|
||||
realDecoder?.let { decoder ->
|
||||
decoder.dequeueInputBuffer()?.let { buff ->
|
||||
if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) {
|
||||
captionRegex.forEach { rgx ->
|
||||
str = str.replace(rgx, "\n")
|
||||
}
|
||||
}
|
||||
|
||||
buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset))
|
||||
|
||||
decoder.queueInputBuffer(buff)
|
||||
Log.i(
|
||||
TAG,
|
||||
"Decoder queueInputBuffer successfully"
|
||||
)
|
||||
}
|
||||
CS3IPlayer.requestSubtitleUpdate?.invoke()
|
||||
}
|
||||
}
|
||||
CS3IPlayer.requestSubtitleUpdate?.invoke()
|
||||
}
|
||||
} else {
|
||||
Log.i(
|
||||
TAG,
|
||||
"Decoder else queueInputBuffer successfully"
|
||||
)
|
||||
|
||||
if (!inputString.isNullOrBlank()) {
|
||||
var str: String = inputString
|
||||
if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) {
|
||||
captionRegex.forEach { rgx ->
|
||||
str = str.replace(rgx, "\n")
|
||||
}
|
||||
bloatRegex.forEach { rgx ->
|
||||
str = str.replace(rgx, "\n")
|
||||
}
|
||||
}
|
||||
inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
|
||||
}
|
||||
|
||||
realDecoder?.queueInputBuffer(inputBuffer)
|
||||
}
|
||||
} catch (e: Exception) {
|
||||
|
|
|
@ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() {
|
|||
}
|
||||
}
|
||||
|
||||
sourceDialog.setOnDismissListener {
|
||||
var shouldDismiss = true
|
||||
|
||||
fun dismiss() {
|
||||
if (isPlaying) {
|
||||
player.handleEvent(CSPlayerEvent.Play)
|
||||
}
|
||||
activity?.hideSystemUI()
|
||||
}
|
||||
|
||||
sourceDialog.setOnDismissListener {
|
||||
if (shouldDismiss) dismiss()
|
||||
selectSourceDialog = null
|
||||
}
|
||||
|
||||
|
@ -325,7 +331,7 @@ class GeneratorPlayer : FullScreenPlayer() {
|
|||
null
|
||||
)
|
||||
val index = prefValues.indexOf(value)
|
||||
text = prefNames[if(index == -1) 0 else index]
|
||||
text = prefNames[if (index == -1) 0 else index]
|
||||
}
|
||||
|
||||
sourceDialog.subtitles_click_settings?.setOnClickListener {
|
||||
|
@ -340,11 +346,13 @@ class GeneratorPlayer : FullScreenPlayer() {
|
|||
null
|
||||
)
|
||||
|
||||
val index = prefValues.indexOf(currentPrefMedia)
|
||||
shouldDismiss = false
|
||||
sourceDialog.dismissSafe(activity)
|
||||
|
||||
val index = prefValues.indexOf(currentPrefMedia)
|
||||
activity?.showDialog(
|
||||
prefNames.toList(),
|
||||
if(index == -1) 0 else index,
|
||||
if (index == -1) 0 else index,
|
||||
ctx.getString(R.string.subtitles_encoding),
|
||||
true,
|
||||
{}) {
|
||||
|
@ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() {
|
|||
prefValues[it]
|
||||
)
|
||||
.apply()
|
||||
println("FORCED ENCODING: ${prefValues[it]}")
|
||||
|
||||
updateForcedEncoding(ctx)
|
||||
dismiss()
|
||||
player.seekTime(-1) // to update subtitles, a dirty trick
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue