forked from recloudstream/cloudstream
		
	regex for captions
This commit is contained in:
		
							parent
							
								
									038800d93a
								
							
						
					
					
						commit
						d4637a3a26
					
				
					 1 changed files with 58 additions and 7 deletions
				
			
		|  | @ -12,11 +12,40 @@ import com.google.android.exoplayer2.text.ttml.TtmlDecoder | ||||||
| import com.google.android.exoplayer2.text.webvtt.WebvttDecoder | import com.google.android.exoplayer2.text.webvtt.WebvttDecoder | ||||||
| import com.google.android.exoplayer2.util.MimeTypes | import com.google.android.exoplayer2.util.MimeTypes | ||||||
| import com.lagradost.cloudstream3.mvvm.logError | import com.lagradost.cloudstream3.mvvm.logError | ||||||
|  | import java.nio.ByteBuffer | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CustomDecoder : SubtitleDecoder { | class CustomDecoder : SubtitleDecoder { | ||||||
|     companion object { |     companion object { | ||||||
|         private const val TAG = "CustomDecoder" |         private const val TAG = "CustomDecoder" | ||||||
|  |         var regexSubtitlesToRemoveCaptions = false | ||||||
|  |         val bloatRegex = | ||||||
|  |             listOf( | ||||||
|  |                 Regex( | ||||||
|  |                     """Support\s+us\s+and\s+become\s+VIP\s+member\s+to\s+remove\s+all\s+ads\s+from\s+(www\.|)OpenSubtitles(\.org|)""", | ||||||
|  |                     RegexOption.IGNORE_CASE | ||||||
|  |                 ), | ||||||
|  |                 Regex( | ||||||
|  |                     """Please\s+rate\s+this\s+subtitle\s+at\s+.*\s+Help\s+other\s+users\s+to\s+choose\s+the\s+best\s+subtitles""", | ||||||
|  |                     RegexOption.IGNORE_CASE | ||||||
|  |                 ), | ||||||
|  |                 Regex( | ||||||
|  |                     """Contact\s(www\.|)OpenSubtitles(\.org|)\s+today""", | ||||||
|  |                     RegexOption.IGNORE_CASE | ||||||
|  |                 ), | ||||||
|  |                 Regex( | ||||||
|  |                     """Advertise\s+your\s+product\s+or\s+brand\s+here""", | ||||||
|  |                     RegexOption.IGNORE_CASE | ||||||
|  |                 ), | ||||||
|  |             ) | ||||||
|  |         val captionRegex = listOf(Regex("""(-\s?|)[\[({][\w\d\s]*?[])}]\s*""")) | ||||||
|  | 
 | ||||||
|  |         fun trimStr(string: String) : String { | ||||||
|  |             return string.trimStart().trim('\uFEFF', '\u200B').replace( | ||||||
|  |                 Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), | ||||||
|  |                 " " | ||||||
|  |             ) | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     private var realDecoder: SubtitleDecoder? = null |     private var realDecoder: SubtitleDecoder? = null | ||||||
|  | @ -45,10 +74,7 @@ class CustomDecoder : SubtitleDecoder { | ||||||
| 
 | 
 | ||||||
|                     //https://emptycharacter.com/ |                     //https://emptycharacter.com/ | ||||||
|                     //https://www.fileformat.info/info/unicode/char/200b/index.htm |                     //https://www.fileformat.info/info/unicode/char/200b/index.htm | ||||||
|                     val str = arr.decodeToString().trimStart().trim('\uFEFF', '\u200B').replace( |                     val str = trimStr(arr.decodeToString()) | ||||||
|                         Regex("[\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u205F]"), |  | ||||||
|                         " " |  | ||||||
|                     ) |  | ||||||
|                     Log.i(TAG, "Got data from queueInputBuffer") |                     Log.i(TAG, "Got data from queueInputBuffer") | ||||||
|                     Log.i(TAG, "first string is >>>$str<<<") |                     Log.i(TAG, "first string is >>>$str<<<") | ||||||
|                     if (str.isNotEmpty()) { |                     if (str.isNotEmpty()) { | ||||||
|  | @ -67,9 +93,34 @@ class CustomDecoder : SubtitleDecoder { | ||||||
|                             TAG, |                             TAG, | ||||||
|                             "Decoder selected: $realDecoder" |                             "Decoder selected: $realDecoder" | ||||||
|                         ) |                         ) | ||||||
|                         realDecoder?.dequeueInputBuffer()?.let { buff -> |                         val decoder = realDecoder | ||||||
|  |                         if (decoder != null) { | ||||||
|  |                             decoder.dequeueInputBuffer()?.let { buff -> | ||||||
|  |                                 if (regexSubtitlesToRemoveCaptions && decoder::class.java != SsaDecoder::class.java) { | ||||||
|  |                                     try { | ||||||
|  |                                         data.position(0) | ||||||
|  |                                         val fullDataArr = ByteArray(data.remaining()) | ||||||
|  |                                         data.get(fullDataArr) | ||||||
|  |                                         var fullStr = trimStr(fullDataArr.decodeToString()) | ||||||
|  | 
 | ||||||
|  |                                         bloatRegex.forEach { rgx -> | ||||||
|  |                                             fullStr = fullStr.replace(rgx, "\n") | ||||||
|  |                                         } | ||||||
|  |                                         captionRegex.forEach { rgx -> | ||||||
|  |                                             fullStr = fullStr.replace(rgx, "\n") | ||||||
|  |                                         } | ||||||
|  |                                         fullStr.replace(Regex("(\r\n|\r|\n){2,}"),"\n") | ||||||
|  | 
 | ||||||
|  |                                         buff.data = ByteBuffer.wrap(fullStr.toByteArray()) | ||||||
|  |                                     } catch (e : Exception) { | ||||||
|  |                                         data.position(pos) | ||||||
|                                         buff.data = data |                                         buff.data = data | ||||||
|                             realDecoder?.queueInputBuffer(buff) |                                     } | ||||||
|  |                                 } else { | ||||||
|  |                                     buff.data = data | ||||||
|  |                                 } | ||||||
|  |                                 decoder.queueInputBuffer(buff) | ||||||
|  |                             } | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue