forked from recloudstream/cloudstream
regex for captions
This commit is contained in:
parent
038800d93a
commit
d4637a3a26
1 changed files with 58 additions and 7 deletions
|
@ -12,11 +12,40 @@ import com.google.android.exoplayer2.text.ttml.TtmlDecoder
|
||||||
import com.google.android.exoplayer2.text.webvtt.WebvttDecoder
|
import com.google.android.exoplayer2.text.webvtt.WebvttDecoder
|
||||||
import com.google.android.exoplayer2.util.MimeTypes
|
import com.google.android.exoplayer2.util.MimeTypes
|
||||||
import com.lagradost.cloudstream3.mvvm.logError
|
import com.lagradost.cloudstream3.mvvm.logError
|
||||||
|
import java.nio.ByteBuffer
|
||||||
|
|
||||||
|
|
||||||
class CustomDecoder : SubtitleDecoder {
|
class CustomDecoder : SubtitleDecoder {
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "CustomDecoder"
|
private const val TAG = "CustomDecoder"
|
||||||
|
var regexSubtitlesToRemoveCaptions = false
|
||||||
|
val bloatRegex =
|
||||||
|
listOf(
|
||||||
|
Regex(
|
||||||
|
"""Support\s+us\s+and\s+become\s+VIP\s+member\s+to\s+remove\s+all\s+ads\s+from\s+(www\.|)OpenSubtitles(\.org|)""",
|
||||||
|
RegexOption.IGNORE_CASE
|
||||||
|
),
|
||||||
|
Regex(
|
||||||
|
"""Please\s+rate\s+this\s+subtitle\s+at\s+.*\s+Help\s+other\s+users\s+to\s+choose\s+the\s+best\s+subtitles""",
|
||||||
|
RegexOption.IGNORE_CASE
|
||||||
|
),
|
||||||
|
Regex(
|
||||||
|
"""Contact\s(www\.|)OpenSubtitles(\.org|)\s+today""",
|
||||||
|
RegexOption.IGNORE_CASE
|
||||||
|
),
|
||||||
|
Regex(
|
||||||
|
"""Advertise\s+your\s+product\s+or\s+brand\s+here""",
|
||||||
|
RegexOption.IGNORE_CASE
|
||||||
|
),
|
||||||
|
)
|
||||||
|
val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*"""))
|
||||||
|
|
||||||
|
fun trimStr(string: String) : String {
|
||||||
|
return string.trimStart().trim('\uFEFF', '\u200B').replace(
|
||||||
|
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
|
||||||
|
" "
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private var realDecoder: SubtitleDecoder? = null
|
private var realDecoder: SubtitleDecoder? = null
|
||||||
|
@ -45,10 +74,7 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
|
|
||||||
//https://emptycharacter.com/
|
//https://emptycharacter.com/
|
||||||
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
//https://www.fileformat.info/info/unicode/char/200b/index.htm
|
||||||
val str = arr.decodeToString().trimStart().trim('\uFEFF', '\u200B').replace(
|
val str = trimStr(arr.decodeToString())
|
||||||
Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"),
|
|
||||||
" "
|
|
||||||
)
|
|
||||||
Log.i(TAG, "Got data from queueInputBuffer")
|
Log.i(TAG, "Got data from queueInputBuffer")
|
||||||
Log.i(TAG, "first string is >>>$str<<<")
|
Log.i(TAG, "first string is >>>$str<<<")
|
||||||
if (str.isNotEmpty()) {
|
if (str.isNotEmpty()) {
|
||||||
|
@ -67,9 +93,34 @@ class CustomDecoder : SubtitleDecoder {
|
||||||
TAG,
|
TAG,
|
||||||
"Decoder selected: $realDecoder"
|
"Decoder selected: $realDecoder"
|
||||||
)
|
)
|
||||||
realDecoder?.dequeueInputBuffer()?.let { buff ->
|
val decoder = realDecoder
|
||||||
buff.data = data
|
if (decoder != null) {
|
||||||
realDecoder?.queueInputBuffer(buff)
|
decoder.dequeueInputBuffer()?.let { buff ->
|
||||||
|
if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) {
|
||||||
|
try {
|
||||||
|
data.position(0)
|
||||||
|
val fullDataArr = ByteArray(data.remaining())
|
||||||
|
data.get(fullDataArr)
|
||||||
|
var fullStr = trimStr(fullDataArr.decodeToString())
|
||||||
|
|
||||||
|
bloatRegex.forEach { rgx ->
|
||||||
|
fullStr = fullStr.replace(rgx, "\n")
|
||||||
|
}
|
||||||
|
captionRegex.forEach { rgx ->
|
||||||
|
fullStr = fullStr.replace(rgx, "\n")
|
||||||
|
}
|
||||||
|
fullStr.replace(Regex("(\r\n|\r|\n){2,}"),"\n")
|
||||||
|
|
||||||
|
buff.data = ByteBuffer.wrap(fullStr.toByteArray())
|
||||||
|
} catch (e : Exception) {
|
||||||
|
data.position(pos)
|
||||||
|
buff.data = data
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buff.data = data
|
||||||
|
}
|
||||||
|
decoder.queueInputBuffer(buff)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue