forked from recloudstream/cloudstream
added subtitle encoding fixed
This commit is contained in:
parent
fbf8a758a3
commit
be4ee26b83
2 changed files with 114 additions and 97 deletions
|
@ -17,6 +17,7 @@ import com.lagradost.cloudstream3.R
|
||||||
import com.lagradost.cloudstream3.mvvm.logError
|
import com.lagradost.cloudstream3.mvvm.logError
|
||||||
import org.mozilla.universalchardet.UniversalDetector
|
import org.mozilla.universalchardet.UniversalDetector
|
||||||
import java.nio.ByteBuffer
|
import java.nio.ByteBuffer
|
||||||
|
import java.nio.charset.Charset
|
||||||
|
|
||||||
class CustomDecoder : SubtitleDecoder {
|
class CustomDecoder : SubtitleDecoder {
|
||||||
companion object {
|
companion object {
|
||||||
|
@ -58,6 +59,8 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
)
|
)
|
||||||
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
|
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
|
||||||
|
|
||||||
|
//https://emptycharacter.com/
|
||||||
|
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
||||||
fun trimStr(string: String): String {
|
fun trimStr(string: String): String {
|
||||||
return string.trimStart().trim('\uFEFF', '\u200B').replace(
|
return string.trimStart().trim('\uFEFF', '\u200B').replace(
|
||||||
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
|
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
|
||||||
|
@ -77,23 +80,12 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer()
|
return realDecoder?.dequeueInputBuffer() ?: SubtitleInputBuffer()
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
|
private fun getStr(byteArray: ByteArray): Pair<String, Charset> {
|
||||||
Log.i(TAG, "queueInputBuffer")
|
|
||||||
try {
|
|
||||||
if (realDecoder == null) {
|
|
||||||
inputBuffer.data?.let { data ->
|
|
||||||
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
|
|
||||||
Log.i(TAG, "Got data from queueInputBuffer")
|
|
||||||
|
|
||||||
var (str, charset) = try {
|
|
||||||
data.position(0)
|
|
||||||
val fullDataArr = ByteArray(data.remaining())
|
|
||||||
data.get(fullDataArr)
|
|
||||||
val encoding = try {
|
val encoding = try {
|
||||||
val encoding = overrideEncoding ?: run {
|
val encoding = overrideEncoding ?: run {
|
||||||
val detector = UniversalDetector()
|
val detector = UniversalDetector()
|
||||||
|
|
||||||
detector.handleData(fullDataArr, 0, fullDataArr.size)
|
detector.handleData(byteArray, 0, byteArray.size)
|
||||||
detector.dataEnd()
|
detector.dataEnd()
|
||||||
|
|
||||||
detector.detectedCharset // "windows-1256"
|
detector.detectedCharset // "windows-1256"
|
||||||
|
@ -110,41 +102,38 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
UTF_8
|
UTF_8
|
||||||
}
|
}
|
||||||
|
|
||||||
var (fullStr, charset) = try {
|
return try {
|
||||||
val set = charset(encoding)
|
val set = charset(encoding)
|
||||||
Pair(String(fullDataArr, set), set)
|
Pair(String(byteArray, set), set)
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "Failed to parse using encoding $encoding")
|
Log.e(TAG, "Failed to parse using encoding $encoding")
|
||||||
logError(e)
|
logError(e)
|
||||||
Pair(fullDataArr.decodeToString(), charset(UTF_8))
|
Pair(byteArray.decodeToString(), charset(UTF_8))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bloatRegex.forEach { rgx ->
|
private fun getStr(input: SubtitleInputBuffer): String? {
|
||||||
fullStr = fullStr.replace(rgx, "\n")
|
try {
|
||||||
}
|
val data = input.data ?: return null
|
||||||
|
data.position(0)
|
||||||
fullStr.replace(Regex("(\r\n|\r|\n){2,}"), "\n")
|
val fullDataArr = ByteArray(data.remaining())
|
||||||
// fullStr = "1\n00:00:01,616 --> 00:00:40,200\n" +
|
data.get(fullDataArr)
|
||||||
// "تــــرجــمة"
|
return trimStr(getStr(fullDataArr).first)
|
||||||
|
|
||||||
Log.i(
|
|
||||||
TAG,
|
|
||||||
"Encoded Text start: " + fullStr.substring(
|
|
||||||
0,
|
|
||||||
minOf(fullStr.length, 300)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
Pair(fullStr, charset)
|
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "Failed to parse text returning plain data")
|
Log.e(TAG, "Failed to parse text returning plain data")
|
||||||
logError(e)
|
logError(e)
|
||||||
return
|
return null
|
||||||
}
|
}
|
||||||
//https://emptycharacter.com/
|
}
|
||||||
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
|
||||||
//val str = trimStr(arr.decodeToString())
|
override fun queueInputBuffer(inputBuffer: SubtitleInputBuffer) {
|
||||||
//Log.i(TAG, "first string is >>>$str<<<")
|
Log.i(TAG, "queueInputBuffer")
|
||||||
if (str.isNotEmpty()) {
|
try {
|
||||||
|
val inputString = getStr(inputBuffer)
|
||||||
|
if (realDecoder == null && !inputString.isNullOrBlank()) {
|
||||||
|
var str: String = inputString
|
||||||
|
// this way we read the subtitle file and decide what decoder to use instead of relying on mimetype
|
||||||
|
Log.i(TAG, "Got data from queueInputBuffer")
|
||||||
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
|
//https://github.com/LagradOst/CloudStream-2/blob/ddd774ee66810137ff7bd65dae70bcf3ba2d2489/CloudStreamForms/CloudStreamForms/Script/MainChrome.cs#L388
|
||||||
realDecoder = when {
|
realDecoder = when {
|
||||||
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
|
str.startsWith("WEBVTT", ignoreCase = true) -> WebvttDecoder()
|
||||||
|
@ -166,9 +155,11 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
captionRegex.forEach { rgx ->
|
captionRegex.forEach { rgx ->
|
||||||
str = str.replace(rgx, "\n")
|
str = str.replace(rgx, "\n")
|
||||||
}
|
}
|
||||||
|
bloatRegex.forEach { rgx ->
|
||||||
|
str = str.replace(rgx, "\n")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
buff.data = ByteBuffer.wrap(str.toByteArray(charset = charset))
|
buff.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
|
||||||
|
|
||||||
decoder.queueInputBuffer(buff)
|
decoder.queueInputBuffer(buff)
|
||||||
Log.i(
|
Log.i(
|
||||||
|
@ -178,9 +169,25 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
}
|
}
|
||||||
CS3IPlayer.requestSubtitleUpdate?.invoke()
|
CS3IPlayer.requestSubtitleUpdate?.invoke()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
|
Log.i(
|
||||||
|
TAG,
|
||||||
|
"Decoder else queueInputBuffer successfully"
|
||||||
|
)
|
||||||
|
|
||||||
|
if (!inputString.isNullOrBlank()) {
|
||||||
|
var str: String = inputString
|
||||||
|
if (regexSubtitlesToRemoveCaptions && realDecoder!!::class.java != SsaDecoder::class.java) {
|
||||||
|
captionRegex.forEach { rgx ->
|
||||||
|
str = str.replace(rgx, "\n")
|
||||||
|
}
|
||||||
|
bloatRegex.forEach { rgx ->
|
||||||
|
str = str.replace(rgx, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inputBuffer.data = ByteBuffer.wrap(str.toByteArray(charset(UTF_8)))
|
||||||
|
}
|
||||||
|
|
||||||
realDecoder?.queueInputBuffer(inputBuffer)
|
realDecoder?.queueInputBuffer(inputBuffer)
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
|
|
|
@ -283,11 +283,17 @@ class GeneratorPlayer : FullScreenPlayer() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sourceDialog.setOnDismissListener {
|
var shouldDismiss = true
|
||||||
|
|
||||||
|
fun dismiss() {
|
||||||
if (isPlaying) {
|
if (isPlaying) {
|
||||||
player.handleEvent(CSPlayerEvent.Play)
|
player.handleEvent(CSPlayerEvent.Play)
|
||||||
}
|
}
|
||||||
activity?.hideSystemUI()
|
activity?.hideSystemUI()
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceDialog.setOnDismissListener {
|
||||||
|
if (shouldDismiss) dismiss()
|
||||||
selectSourceDialog = null
|
selectSourceDialog = null
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,7 +331,7 @@ class GeneratorPlayer : FullScreenPlayer() {
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
val index = prefValues.indexOf(value)
|
val index = prefValues.indexOf(value)
|
||||||
text = prefNames[if(index == -1) 0 else index]
|
text = prefNames[if (index == -1) 0 else index]
|
||||||
}
|
}
|
||||||
|
|
||||||
sourceDialog.subtitles_click_settings?.setOnClickListener {
|
sourceDialog.subtitles_click_settings?.setOnClickListener {
|
||||||
|
@ -340,11 +346,13 @@ class GeneratorPlayer : FullScreenPlayer() {
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
|
|
||||||
val index = prefValues.indexOf(currentPrefMedia)
|
shouldDismiss = false
|
||||||
sourceDialog.dismissSafe(activity)
|
sourceDialog.dismissSafe(activity)
|
||||||
|
|
||||||
|
val index = prefValues.indexOf(currentPrefMedia)
|
||||||
activity?.showDialog(
|
activity?.showDialog(
|
||||||
prefNames.toList(),
|
prefNames.toList(),
|
||||||
if(index == -1) 0 else index,
|
if (index == -1) 0 else index,
|
||||||
ctx.getString(R.string.subtitles_encoding),
|
ctx.getString(R.string.subtitles_encoding),
|
||||||
true,
|
true,
|
||||||
{}) {
|
{}) {
|
||||||
|
@ -354,8 +362,10 @@ class GeneratorPlayer : FullScreenPlayer() {
|
||||||
prefValues[it]
|
prefValues[it]
|
||||||
)
|
)
|
||||||
.apply()
|
.apply()
|
||||||
println("FORCED ENCODING: ${prefValues[it]}")
|
|
||||||
updateForcedEncoding(ctx)
|
updateForcedEncoding(ctx)
|
||||||
|
dismiss()
|
||||||
|
player.seekTime(-1) // to update subtitles, a dirty trick
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue