[YouTube] Add JavaScript lexer to parse completely throttling decryption function (#905)
This commit is contained in:
parent
a99af9bb6e
commit
7244be7627
9 changed files with 1889 additions and 173 deletions
|
@ -3,7 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube;
|
|||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.utils.JavaScript;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.StringUtils;
|
||||
import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -119,21 +119,12 @@ public final class YoutubeThrottlingDecrypter {
|
|||
private static String parseDecodeFunction(final String playerJsCode, final String functionName)
|
||||
throws Parser.RegexException {
|
||||
try {
|
||||
return parseWithParenthesisMatching(playerJsCode, functionName);
|
||||
return parseWithLexer(playerJsCode, functionName);
|
||||
} catch (final Exception e) {
|
||||
return parseWithRegex(playerJsCode, functionName);
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String parseWithParenthesisMatching(final String playerJsCode,
|
||||
final String functionName) {
|
||||
final String functionBase = functionName + "=function";
|
||||
return validateFunction(functionBase
|
||||
+ StringUtils.matchToClosingParenthesis(playerJsCode, functionBase)
|
||||
+ ";");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String parseWithRegex(final String playerJsCode, final String functionName)
|
||||
throws Parser.RegexException {
|
||||
|
@ -153,6 +144,14 @@ public final class YoutubeThrottlingDecrypter {
|
|||
return function;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String parseWithLexer(final String playerJsCode, final String functionName)
|
||||
throws ParsingException {
|
||||
final String functionBase = functionName + "=function";
|
||||
return functionBase + JavaScriptExtractor.matchToClosingBrace(playerJsCode, functionBase)
|
||||
+ ";";
|
||||
}
|
||||
|
||||
private static boolean containsNParam(final String url) {
|
||||
return Parser.isMatch(N_PARAM_PATTERN, url);
|
||||
}
|
||||
|
|
|
@ -1,91 +0,0 @@
|
|||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
public final class StringUtils {
|
||||
|
||||
private StringUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string The string to search in.
|
||||
* @param start A string from which to start searching.
|
||||
* @return A substring where each '{' matches a '}'.
|
||||
* @throws IndexOutOfBoundsException If {@code string} does not contain {@code start}
|
||||
* or parenthesis could not be matched .
|
||||
*/
|
||||
@Nonnull
|
||||
public static String matchToClosingParenthesis(@Nonnull final String string,
|
||||
@Nonnull final String start) {
|
||||
int startIndex = string.indexOf(start);
|
||||
if (startIndex < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
|
||||
startIndex += start.length();
|
||||
int endIndex = findNextParenthesis(string, startIndex, true);
|
||||
++endIndex;
|
||||
|
||||
int openParenthesis = 1;
|
||||
while (openParenthesis > 0) {
|
||||
endIndex = findNextParenthesis(string, endIndex, false);
|
||||
|
||||
switch (string.charAt(endIndex)) {
|
||||
case '{':
|
||||
++openParenthesis;
|
||||
break;
|
||||
case '}':
|
||||
--openParenthesis;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
++endIndex;
|
||||
}
|
||||
|
||||
return string.substring(startIndex, endIndex);
|
||||
}
|
||||
|
||||
private static int findNextParenthesis(@Nonnull final String string,
|
||||
final int offset,
|
||||
final boolean onlyOpen) {
|
||||
boolean lastEscaped = false;
|
||||
char quote = ' ';
|
||||
|
||||
for (int i = offset; i < string.length(); i++) {
|
||||
boolean thisEscaped = false;
|
||||
final char c = string.charAt(i);
|
||||
|
||||
switch (c) {
|
||||
case '{':
|
||||
if (quote == ' ') {
|
||||
return i;
|
||||
}
|
||||
break;
|
||||
case '}':
|
||||
if (!onlyOpen && quote == ' ') {
|
||||
return i;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
if (!lastEscaped) {
|
||||
thisEscaped = true;
|
||||
}
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
if (!lastEscaped) {
|
||||
if (quote == ' ') {
|
||||
quote = c;
|
||||
} else if (quote == c) {
|
||||
quote = ' ';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lastEscaped = thisEscaped;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package org.schabi.newpipe.extractor.utils.jsextractor;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
/**
|
||||
* Utility class for extracting functions from JavaScript code.
|
||||
*/
|
||||
public final class JavaScriptExtractor {
|
||||
private JavaScriptExtractor() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the given JavaScript code for the identifier of a function
|
||||
* and returns its body.
|
||||
*
|
||||
* @param jsCode JavaScript code
|
||||
* @param start start of the function (without the opening brace)
|
||||
* @return extracted code (opening brace + function + closing brace)
|
||||
* @throws ParsingException
|
||||
*/
|
||||
@Nonnull
|
||||
public static String matchToClosingBrace(final String jsCode, final String start)
|
||||
throws ParsingException {
|
||||
int startIndex = jsCode.indexOf(start);
|
||||
if (startIndex < 0) {
|
||||
throw new ParsingException("Start not found");
|
||||
}
|
||||
startIndex += start.length();
|
||||
final String js = jsCode.substring(startIndex);
|
||||
|
||||
final Lexer lexer = new Lexer(js);
|
||||
boolean visitedOpenBrace = false;
|
||||
|
||||
while (true) {
|
||||
final Lexer.ParsedToken parsedToken = lexer.getNextToken();
|
||||
final Token t = parsedToken.token;
|
||||
|
||||
if (t == Token.LC) {
|
||||
visitedOpenBrace = true;
|
||||
} else if (visitedOpenBrace && lexer.isBalanced()) {
|
||||
return js.substring(0, parsedToken.end);
|
||||
} else if (t == Token.EOF) {
|
||||
throw new ParsingException("Could not find matching braces");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,311 @@
|
|||
package org.schabi.newpipe.extractor.utils.jsextractor;
|
||||
|
||||
import org.mozilla.javascript.Context;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* JavaScript lexer that is able to parse JavaScript code and return its
|
||||
* tokens.
|
||||
*
|
||||
* <p>
|
||||
* The algorithm for distinguishing between division operators and regex literals
|
||||
* was taken from the <a href="https://github.com/rusty-ecma/RESS/">RESS lexer</a>.
|
||||
* </p>
|
||||
*/
|
||||
public class Lexer {
|
||||
private static class Paren {
|
||||
public final boolean funcExpr;
|
||||
public final boolean conditional;
|
||||
|
||||
Paren(final boolean funcExpr, final boolean conditional) {
|
||||
this.funcExpr = funcExpr;
|
||||
this.conditional = conditional;
|
||||
}
|
||||
}
|
||||
|
||||
private static class Brace {
|
||||
public final boolean isBlock;
|
||||
public final Paren paren;
|
||||
|
||||
Brace(final boolean isBlock, final Paren paren) {
|
||||
this.isBlock = isBlock;
|
||||
this.paren = paren;
|
||||
}
|
||||
}
|
||||
|
||||
private static class MetaToken {
|
||||
public final Token token;
|
||||
public final int lineno;
|
||||
|
||||
MetaToken(final Token token, final int lineno) {
|
||||
this.token = token;
|
||||
this.lineno = lineno;
|
||||
}
|
||||
}
|
||||
|
||||
private static class BraceMetaToken extends MetaToken {
|
||||
public final Brace brace;
|
||||
|
||||
BraceMetaToken(final Token token, final int lineno, final Brace brace) {
|
||||
super(token, lineno);
|
||||
this.brace = brace;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ParenMetaToken extends MetaToken {
|
||||
public final Paren paren;
|
||||
|
||||
ParenMetaToken(final Token token, final int lineno, final Paren paren) {
|
||||
super(token, lineno);
|
||||
this.paren = paren;
|
||||
}
|
||||
}
|
||||
|
||||
private static class LookBehind {
|
||||
private final MetaToken[] list;
|
||||
|
||||
LookBehind() {
|
||||
list = new MetaToken[3];
|
||||
}
|
||||
|
||||
void push(final MetaToken t) {
|
||||
MetaToken toShift = t;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
final MetaToken tmp = list[i];
|
||||
list[i] = toShift;
|
||||
toShift = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
MetaToken one() {
|
||||
return list[0];
|
||||
}
|
||||
|
||||
MetaToken two() {
|
||||
return list[1];
|
||||
}
|
||||
|
||||
MetaToken three() {
|
||||
return list[2];
|
||||
}
|
||||
|
||||
boolean oneIs(final Token token) {
|
||||
return list[0] != null && list[0].token == token;
|
||||
}
|
||||
|
||||
boolean twoIs(final Token token) {
|
||||
return list[1] != null && list[1].token == token;
|
||||
}
|
||||
|
||||
boolean threeIs(final Token token) {
|
||||
return list[2] != null && list[2].token == token;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parsed token, containing the token and its position in the input string
|
||||
*/
|
||||
public static class ParsedToken {
|
||||
public final Token token;
|
||||
public final int start;
|
||||
public final int end;
|
||||
|
||||
ParsedToken(final Token token, final int start, final int end) {
|
||||
this.token = token;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
}
|
||||
|
||||
private final TokenStream stream;
|
||||
private final LookBehind lastThree;
|
||||
private final Stack<Brace> braceStack;
|
||||
private final Stack<Paren> parenStack;
|
||||
|
||||
/**
|
||||
* Create a new JavaScript lexer with the given source code
|
||||
*
|
||||
* @param js JavaScript code
|
||||
* @param languageVersion JavaScript version (from Rhino)
|
||||
*/
|
||||
public Lexer(final String js, final int languageVersion) {
|
||||
stream = new TokenStream(js, 0, languageVersion);
|
||||
lastThree = new LookBehind();
|
||||
braceStack = new Stack<>();
|
||||
parenStack = new Stack<>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new JavaScript lexer with the given source code
|
||||
*
|
||||
* @param js JavaScript code
|
||||
*/
|
||||
public Lexer(final String js) {
|
||||
this(js, Context.VERSION_DEFAULT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Continue parsing and return the next token
|
||||
* @return next token
|
||||
* @throws ParsingException
|
||||
*/
|
||||
public ParsedToken getNextToken() throws ParsingException {
|
||||
Token token = stream.nextToken();
|
||||
|
||||
if ((token == Token.DIV || token == Token.ASSIGN_DIV) && isRegexStart()) {
|
||||
stream.readRegExp(token);
|
||||
token = Token.REGEXP;
|
||||
}
|
||||
|
||||
final ParsedToken parsedToken = new ParsedToken(token, stream.tokenBeg, stream.tokenEnd);
|
||||
keepBooks(parsedToken);
|
||||
return parsedToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the parser is balanced (equal amount of open and closed parentheses and braces)
|
||||
* @return true if balanced
|
||||
*/
|
||||
public boolean isBalanced() {
|
||||
return braceStack.isEmpty() && parenStack.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate the token for possible regex start and handle updating the
|
||||
* `self.last_three`, `self.paren_stack` and `self.brace_stack`
|
||||
*/
|
||||
void keepBooks(final ParsedToken parsedToken) throws ParsingException {
|
||||
if (parsedToken.token.isPunct) {
|
||||
switch (parsedToken.token) {
|
||||
case LP:
|
||||
handleOpenParenBooks();
|
||||
return;
|
||||
case LC:
|
||||
handleOpenBraceBooks();
|
||||
return;
|
||||
case RP:
|
||||
handleCloseParenBooks(parsedToken.start);
|
||||
return;
|
||||
case RC:
|
||||
handleCloseBraceBooks(parsedToken.start);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (parsedToken.token != Token.COMMENT) {
|
||||
lastThree.push(new MetaToken(parsedToken.token, stream.lineno));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the book keeping when we find an `(`
|
||||
*/
|
||||
void handleOpenParenBooks() {
|
||||
boolean funcExpr = false;
|
||||
if (lastThree.oneIs(Token.FUNCTION)) {
|
||||
funcExpr = lastThree.two() != null && checkForExpression(lastThree.two().token);
|
||||
} else if (lastThree.twoIs(Token.FUNCTION)) {
|
||||
funcExpr = lastThree.three() != null && checkForExpression(lastThree.three().token);
|
||||
}
|
||||
|
||||
final boolean conditional = lastThree.one() != null
|
||||
&& lastThree.one().token.isConditional();
|
||||
|
||||
final Paren paren = new Paren(funcExpr, conditional);
|
||||
parenStack.push(paren);
|
||||
lastThree.push(new ParenMetaToken(Token.LP, stream.lineno, paren));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the book keeping when we find an `{`
|
||||
*/
|
||||
void handleOpenBraceBooks() {
|
||||
boolean isBlock = true;
|
||||
if (lastThree.one() != null) {
|
||||
switch (lastThree.one().token) {
|
||||
case LP:
|
||||
case LC:
|
||||
case CASE:
|
||||
isBlock = false;
|
||||
break;
|
||||
case COLON:
|
||||
isBlock = !braceStack.isEmpty() && braceStack.lastElement().isBlock;
|
||||
break;
|
||||
case RETURN:
|
||||
case YIELD:
|
||||
case YIELD_STAR:
|
||||
isBlock = lastThree.two() != null && lastThree.two().lineno != stream.lineno;
|
||||
break;
|
||||
default:
|
||||
isBlock = !lastThree.one().token.isOp;
|
||||
}
|
||||
}
|
||||
|
||||
Paren paren = null;
|
||||
if (lastThree.one() instanceof ParenMetaToken && lastThree.one().token == Token.RP) {
|
||||
paren = ((ParenMetaToken) lastThree.one()).paren;
|
||||
}
|
||||
final Brace brace = new Brace(isBlock, paren);
|
||||
braceStack.push(brace);
|
||||
lastThree.push(new BraceMetaToken(Token.LC, stream.lineno, brace));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the book keeping when we find an `)`
|
||||
*/
|
||||
void handleCloseParenBooks(final int start) throws ParsingException {
|
||||
if (parenStack.isEmpty()) {
|
||||
throw new ParsingException("unmached closing paren at " + start);
|
||||
}
|
||||
lastThree.push(new ParenMetaToken(Token.RP, stream.lineno, parenStack.pop()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle the book keeping when we find an `}`
|
||||
*/
|
||||
void handleCloseBraceBooks(final int start) throws ParsingException {
|
||||
if (braceStack.isEmpty()) {
|
||||
throw new ParsingException("unmatched closing brace at " + start);
|
||||
}
|
||||
lastThree.push(new BraceMetaToken(Token.RC, stream.lineno, braceStack.pop()));
|
||||
}
|
||||
|
||||
boolean checkForExpression(final Token token) {
|
||||
return token.isOp || token == Token.RETURN || token == Token.CASE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if the `/` is the beginning of a regex or is division
|
||||
* <a href="https://github.com/sweet-js/sweet-core/wiki/design">see this for more details</a>
|
||||
*
|
||||
* @return isRegexStart
|
||||
*/
|
||||
boolean isRegexStart() {
|
||||
if (lastThree.one() != null) {
|
||||
final Token t = lastThree.one().token;
|
||||
if (t.isKeyw) {
|
||||
return t != Token.THIS;
|
||||
} else if (t == Token.RP && lastThree.one() instanceof ParenMetaToken) {
|
||||
return ((ParenMetaToken) lastThree.one()).paren.conditional;
|
||||
} else if (t == Token.RC && lastThree.one() instanceof BraceMetaToken) {
|
||||
final BraceMetaToken mt = (BraceMetaToken) lastThree.one();
|
||||
if (mt.brace.isBlock) {
|
||||
if (mt.brace.paren != null) {
|
||||
return !mt.brace.paren.funcExpr;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (t.isPunct) {
|
||||
return t != Token.RB;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,121 @@
|
|||
package org.schabi.newpipe.extractor.utils.jsextractor;
|
||||
|
||||
public enum Token {
|
||||
ERROR,
|
||||
EOF,
|
||||
EOL,
|
||||
RETURN(false, false, true),
|
||||
BITOR(true, true, false),
|
||||
BITXOR(true, true, false),
|
||||
BITAND(true, true, false),
|
||||
EQ(true, true, false),
|
||||
NE(true, true, false),
|
||||
LT(true, true, false),
|
||||
LE(true, true, false),
|
||||
GT(true, true, false),
|
||||
GE(true, true, false),
|
||||
LSH(true, true, false),
|
||||
RSH(true, true, false),
|
||||
URSH(true, true, false),
|
||||
ADD(true, true, false),
|
||||
SUB(true, true, false),
|
||||
MUL(true, true, false),
|
||||
DIV(true, true, false),
|
||||
MOD(true, true, false),
|
||||
NOT(true, true, false),
|
||||
BITNOT(true, true, false),
|
||||
NEW(true, false, true),
|
||||
DELPROP(true, false, true),
|
||||
TYPEOF(true, false, true),
|
||||
NAME,
|
||||
NUMBER,
|
||||
STRING,
|
||||
NULL(false, false, true),
|
||||
THIS(false, false, true),
|
||||
FALSE(false, false, true),
|
||||
TRUE(false, false, true),
|
||||
SHEQ(true, true, false), // shallow equality (===)
|
||||
SHNE(true, true, false), // shallow inequality (!==)
|
||||
REGEXP,
|
||||
THROW(true, false, true),
|
||||
IN(true, false, true),
|
||||
INSTANCEOF(true, false, true),
|
||||
YIELD(false, false, true), // JS 1.7 yield pseudo keyword
|
||||
EXP(true, true, false), // Exponentiation Operator
|
||||
BIGINT, // ES2020 BigInt
|
||||
TRY(false, false, true),
|
||||
SEMI(false, true, false), // semicolon
|
||||
LB(false, true, false), // left and right brackets
|
||||
RB(false, true, false),
|
||||
LC(false, true, false), // left and right curlies (braces)
|
||||
RC(false, true, false),
|
||||
LP(false, true, false), // left and right parentheses
|
||||
RP(false, true, false),
|
||||
COMMA(false, true, false), // comma operator
|
||||
ASSIGN(true, true, false), // simple assignment (=)
|
||||
ASSIGN_BITOR(true, true, false), // |=
|
||||
ASSIGN_BITXOR(true, true, false), // ^=
|
||||
ASSIGN_BITAND(true, true, false), // |=
|
||||
ASSIGN_LSH(true, true, false), // <<=
|
||||
ASSIGN_RSH(true, true, false), // >>=
|
||||
ASSIGN_URSH(true, true, false), // >>>=
|
||||
ASSIGN_ADD(true, true, false), // +=
|
||||
ASSIGN_SUB(true, true, false), // -=
|
||||
ASSIGN_MUL(true, true, false), // *=
|
||||
ASSIGN_DIV(true, true, false), // /=
|
||||
ASSIGN_MOD(true, true, false), // %=
|
||||
ASSIGN_EXP(true, true, false), // **=
|
||||
HOOK(true, true, false), // conditional (?:)
|
||||
COLON(true, true, false),
|
||||
OR(true, true, false), // logical or (||)
|
||||
AND(true, true, false), // logical and (&&)
|
||||
INC(true, true, false), // increment/decrement (++ --)
|
||||
DEC(true, true, false),
|
||||
DOT(false, true, false), // member operator (.)
|
||||
FUNCTION(false, false, true), // function keyword
|
||||
EXPORT(false, false, true), // export keyword
|
||||
IMPORT(false, false, true), // import keyword
|
||||
IF(false, false, true), // if keyword
|
||||
ELSE(false, false, true), // else keyword
|
||||
SWITCH(false, false, true), // switch keyword
|
||||
CASE(false, false, true), // case keyword
|
||||
DEFAULT(false, false, true), // default keyword
|
||||
WHILE(false, false, true), // while keyword
|
||||
DO(false, false, true), // do keyword
|
||||
FOR(false, false, true), // for keyword
|
||||
BREAK(false, false, true), // break keyword
|
||||
CONTINUE(false, false, true), // continue keyword
|
||||
VAR(false, false, true), // var keyword
|
||||
WITH(false, false, true), // with keyword
|
||||
CATCH(false, false, true), // catch keyword
|
||||
FINALLY(false, false, true), // finally keyword
|
||||
VOID(true, false, true), // void keyword
|
||||
RESERVED(false, false, true), // reserved keywords
|
||||
LET(false, false, true), // JS 1.7 let pseudo keyword
|
||||
CONST(false, false, true),
|
||||
DEBUGGER(false, false, true),
|
||||
COMMENT,
|
||||
ARROW(false, true, false), // ES6 ArrowFunction
|
||||
YIELD_STAR(false, false, true), // ES6 "yield *", a specialization of yield
|
||||
TEMPLATE_LITERAL; // template literal
|
||||
|
||||
public final boolean isOp;
|
||||
public final boolean isPunct;
|
||||
public final boolean isKeyw;
|
||||
|
||||
Token(final boolean isOp, final boolean isPunct, final boolean isKeyw) {
|
||||
this.isOp = isOp;
|
||||
this.isPunct = isPunct;
|
||||
this.isKeyw = isKeyw;
|
||||
}
|
||||
|
||||
Token() {
|
||||
this.isOp = false;
|
||||
this.isPunct = false;
|
||||
this.isKeyw = false;
|
||||
}
|
||||
|
||||
public boolean isConditional() {
|
||||
return this == IF || this == FOR || this == WHILE || this == WITH;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,54 @@
|
|||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.utils.jsextractor.JavaScriptExtractor;
|
||||
import org.schabi.newpipe.extractor.utils.jsextractor.Lexer;
|
||||
import org.schabi.newpipe.extractor.utils.jsextractor.Token;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
import static org.schabi.newpipe.FileUtils.resolveTestResource;
|
||||
|
||||
public class JavaScriptExtractorTest
|
||||
{
|
||||
@Test
|
||||
void testJsExtractor() throws ParsingException {
|
||||
final String src = "Wka=function(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}//some={}random-padding+;";
|
||||
final String result = JavaScriptExtractor.matchToClosingBrace(src, "Wka=function");
|
||||
assertEquals("(d){var x = [/,,/,913,/(,)}/g,\"abcdef}\\\"\",];var y = 10/2/1;return x[1][y];}", result);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testEverythingJs() throws ParsingException, IOException {
|
||||
final File jsFile = resolveTestResource("es5.js");
|
||||
final StringBuilder contentBuilder = new StringBuilder();
|
||||
Files.lines(jsFile.toPath()).forEach(line -> contentBuilder.append(line).append("\n"));
|
||||
|
||||
final String js = contentBuilder.toString();
|
||||
|
||||
final Lexer lexer = new Lexer(js);
|
||||
Lexer.ParsedToken parsedToken = null;
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
parsedToken = lexer.getNextToken();
|
||||
if (parsedToken.token == Token.EOF) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (final Exception e){
|
||||
if (parsedToken != null) {
|
||||
throw new ParsingException("Issue occured at pos " + parsedToken.end + ", after\n" +
|
||||
js.substring(Math.max(0, parsedToken.start - 50), parsedToken.end), e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
assertTrue(lexer.isBalanced());
|
||||
}
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package org.schabi.newpipe.extractor.utils;
|
||||
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.schabi.newpipe.extractor.utils.StringUtils.matchToClosingParenthesis;
|
||||
|
||||
public class StringUtilsTest {
|
||||
|
||||
@Test
|
||||
public void actualDecodeFunction__success() {
|
||||
String preNoise = "if(\"function\"===typeof b&&\"function\"===typeof c||\"function\"===typeof c&&\"function\"===typeof d)throw Error(\"It looks like you are passing several store enhancers to createStore(). This is not supported. Instead, compose them together to a single function.\");\"function\"===typeof b&&\"undefined\"===typeof c&&(c=b,b=void 0);if(\"undefined\"!==typeof c){if(\"function\"!==typeof c)throw Error(\"Expected the enhancer to be a function.\");return c(Dr)(a,b)}if(\"function\"!==typeof a)throw Error(\"Expected the reducer to be a function.\");\n" +
|
||||
"var l=a,m=b,n=[],p=n,q=!1;h({type:Cr});a={};var t=(a.dispatch=h,a.subscribe=f,a.getState=e,a.replaceReducer=function(u){if(\"function\"!==typeof u)throw Error(\"Expected the nextReducer to be a function.\");l=u;h({type:hha});return t},a[Er]=function(){var u={};\n" +
|
||||
"return u.subscribe=function(x){function y(){x.next&&x.next(e())}\n" +
|
||||
"if(\"object\"!==typeof x||null===x)throw new TypeError(\"Expected the observer to be an object.\");y();return{unsubscribe:f(y)}},u[Er]=function(){return this},u},a);\n" +
|
||||
"return t};\n" +
|
||||
"Fr=function(a){De.call(this,a,-1,iha)};\n" +
|
||||
"Gr=function(a){De.call(this,a)};\n" +
|
||||
"jha=function(a,b){for(;Jd(b);)switch(b.C){case 10:var c=Od(b);Ge(a,1,c);break;case 18:c=Od(b);Ge(a,2,c);break;case 26:c=Od(b);Ge(a,3,c);break;case 34:c=Od(b);Ge(a,4,c);break;case 40:c=Hd(b.i);Ge(a,5,c);break;default:if(!we(b))return a}return a};";
|
||||
String signature = "kha=function(a)";
|
||||
String body = "{var b=a.split(\"\"),c=[-1186681497,-1653318181,372630254,function(d,e){for(var f=64,h=[];++f-h.length-32;){switch(f){case 58:f-=14;case 91:case 92:case 93:continue;case 123:f=47;case 94:case 95:case 96:continue;case 46:f=95}h.push(String.fromCharCode(f))}d.forEach(function(l,m,n){this.push(n[m]=h[(h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length])},e.split(\"\"))},\n" +
|
||||
"-467738125,1158037010,function(d,e){e=(e%d.length+d.length)%d.length;var f=d[0];d[0]=d[e];d[e]=f},\n" +
|
||||
"\"continue\",158531598,-172776392,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(-e).reverse().forEach(function(f){d.unshift(f)})},\n" +
|
||||
"-1753359936,function(d){for(var e=d.length;e;)d.push(d.splice(--e,1)[0])},\n" +
|
||||
"1533713399,-1736576025,-1274201783,function(d){d.reverse()},\n" +
|
||||
"169126570,1077517431,function(d,e){d.push(e)},\n" +
|
||||
"-1807932259,-150219E3,480561184,-3495188,-1856307605,1416497372,b,-1034568435,-501230371,1979778585,null,b,-1049521459,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(0,1,d.splice(e,1,d[0])[0])},\n" +
|
||||
"1119056651,function(d,e){for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())},\n" +
|
||||
"b,1460920438,135616752,-1807932259,-815823682,-387465417,1979778585,113585E4,function(d,e){d.push(e)},\n" +
|
||||
"-1753359936,-241651400,-386043301,-144139513,null,null,function(d,e){e=(e%d.length+d.length)%d.length;d.splice(e,1)}];\n" +
|
||||
"c[30]=c;c[49]=c;c[50]=c;try{c[51](c[26],c[25]),c[10](c[30],c[17]),c[5](c[28],c[9]),c[18](c[51]),c[14](c[19],c[21]),c[8](c[40],c[22]),c[50](c[35],c[28]),c[24](c[29],c[3]),c[0](c[31],c[19]),c[27](c[26],c[33]),c[29](c[36],c[40]),c[50](c[26]),c[27](c[32],c[9]),c[8](c[10],c[14]),c[35](c[44],c[28]),c[22](c[44],c[1]),c[8](c[11],c[3]),c[29](c[44]),c[21](c[41],c[45]),c[16](c[32],c[4]),c[17](c[14],c[26]),c[36](c[20],c[45]),c[43](c[35],c[39]),c[43](c[20],c[23]),c[43](c[10],c[51]),c[43](c[34],c[32]),c[29](c[34],\n" +
|
||||
"c[49]),c[43](c[20],c[44]),c[49](c[20]),c[19](c[15],c[8]),c[36](c[15],c[46]),c[17](c[20],c[37]),c[18](c[10]),c[17](c[34],c[31]),c[19](c[10],c[30]),c[19](c[20],c[2]),c[36](c[20],c[21]),c[43](c[35],c[16]),c[19](c[35],c[5]),c[18](c[46],c[34])}catch(d){return\"enhanced_except_lJMB6-z-_w8_\"+a}return b.join(\"\")}";
|
||||
String postNoise = "Hr=function(a){this.i=a}";
|
||||
|
||||
String substring = matchToClosingParenthesis(preNoise + '\n' + signature + body + ";" + postNoise, signature);
|
||||
|
||||
assertEquals(body, substring);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void moreClosing__success() {
|
||||
String expected = "{{{}}}";
|
||||
String string = "a" + expected + "}}";
|
||||
|
||||
String substring = matchToClosingParenthesis(string, "a");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
|
||||
@Disabled("Functionality currently not needed")
|
||||
@Test
|
||||
public void lessClosing__success() {
|
||||
String expected = "{{{}}}";
|
||||
String string = "a{{" + expected;
|
||||
|
||||
String substring = matchToClosingParenthesis(string, "a");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
|
||||
@Test
|
||||
void find_closing_with_quotes() {
|
||||
final String expected = "{return \",}\\\"/\"}";
|
||||
final String string = "function(d){return \",}\\\"/\"}";
|
||||
|
||||
final String substring = matchToClosingParenthesis(string, "function(d)");
|
||||
|
||||
assertEquals(expected, substring);
|
||||
}
|
||||
}
|
182
extractor/src/test/resources/es5.js
Normal file
182
extractor/src/test/resources/es5.js
Normal file
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
Source: https://github.com/michaelficarra/everything.js
|
||||
|
||||
Copyright (c) 2014, Michael Ficarra.
|
||||
BSD-3-Clause license: https://opensource.org/licenses/BSD-3-Clause
|
||||
*/
|
||||
/* this file contains all grammatical productions in ECMA-262 edition 5.1 ** * **/
|
||||
|
||||
// whitespace
|
||||
tab:for(;;)break tab;
|
||||
verticalTab:for(;;)breakverticalTab;
|
||||
formFeed:for(;;)breakformFeed;
|
||||
space:for(;;)break space;
|
||||
nbsp:for(;;)break nbsp;
|
||||
bom:for(;;)breakbom;
|
||||
|
||||
// line terminators
|
||||
lineFeed:0
|
||||
0;
|
||||
carriageReturn:0
|
||||
0;
|
||||
carriageReturnLineFeed:0
|
||||
0;
|
||||
lineSeparator:0
0;
|
||||
paragraphSeparator:0
0;
|
||||
|
||||
// identifier names
|
||||
var $, _, \u0078, x$, x_, x\u0030, xa, x0, x0a, x0123456789,
|
||||
qwertyuiopasdfghjklzxcvbnm, QWERTYUIOPASDFGHJKLZXCVBNM;
|
||||
// a representative sample of unicode letters and numbers
|
||||
var œ一, ǻ둘, ɤ〩, φ, fiⅷ, ユニコード, x;
|
||||
|
||||
null; true; false;
|
||||
|
||||
0; 00; 1234567890; 01234567;
|
||||
0.; 0.00; 10.00; .0; .00
|
||||
0e0; 0E0; 0.e0; 0.00e+0; .00e-0;
|
||||
0x0; 0X0; 0x0123456789abcdefABCDEF;
|
||||
2e308;
|
||||
|
||||
""; "'"; "\'\"\\\b\f\n\r\t\v\0";
|
||||
"\1\00\400\000";
|
||||
"\x01\x23\x45\x67\x89\xAB\xCD\xEF";
|
||||
"\u0123\u4567\u89AB\uCDEF"; "\
|
||||
";
|
||||
|
||||
''; '"'; '\'\"\\\b\f\n\r\t\v\0';
|
||||
'\1\00\400\000';
|
||||
'\x01\x23\x45\x67\x89\xAB\xCD\xEF';
|
||||
'\u0123\u4567\u89AB\uCDEF'; '\
|
||||
';
|
||||
|
||||
/x/; /|/; /|||/;
|
||||
/^$\b\B/; /(?=(?!(?:(.))))/;
|
||||
/a.\f\n\r\t\v\0\[\-\/\\\x00\u0000/; /\d\D\s\S\w\W/;
|
||||
/\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz/;
|
||||
/\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ/;
|
||||
/[a-z-]/; /[^\b\-^]/; /[/\]\\]/;
|
||||
/./i; /./g; /./m; /./igm;
|
||||
/.*/; /.*?/; /.+/; /.+?/; /.?/; /.??/;
|
||||
/.{0}/; /.{0,}/; /.{0,0}/;
|
||||
|
||||
this;
|
||||
|
||||
x;
|
||||
|
||||
[]; [,]; [0]; [0,]; [,0]; [0,0]; [0,0,]; [0,,0]; [,,];
|
||||
|
||||
({}); ({x:0}); ({x:0,y:0}); ({x:0,}); ({'x':0,"y":0,var:0,});
|
||||
({0:0}); ({0.:0}); ({0.0:0}); ({.0:0}); ({0e0:0}); ({0x0:0});
|
||||
({
|
||||
get x(){}, set x(a){}, get 'y'(){}, set "y"(a){},
|
||||
get 0(){}, set 0(a){}, get var(){}, set var(x){},
|
||||
});
|
||||
|
||||
0..a;
|
||||
|
||||
0[0];
|
||||
|
||||
// this function makes the NewExpression and CallExpression tests not throw at runtime
|
||||
x = function f(){ return f; }; x[0] = x; x.a = x;
|
||||
|
||||
new x(); new new x()();
|
||||
new x[0](); new x.a(); new x[0].a(); new x.a[0]();
|
||||
new x; new new x; new new x();
|
||||
new new x().a; new new x()[0];
|
||||
|
||||
x(); x()(); x(x); x(x, x);
|
||||
x.a().a(); x[0]()[0](); x().a[0]();
|
||||
|
||||
x++; x--;
|
||||
|
||||
delete void typeof+-~!x; ++x; --x;
|
||||
|
||||
0*0; 0/0; 0%0;
|
||||
|
||||
0+0; 0-0;
|
||||
|
||||
0<<0; 0>>0; 0>>>0;
|
||||
|
||||
0<0; 0>0; 0<=0; 0>=0;
|
||||
0 instanceof function(){};
|
||||
0 in{};
|
||||
|
||||
0==0; 0!=0; 0===0; 0!==0;
|
||||
|
||||
0&0; 0^0; 0|0; 0&&0; 0||0;
|
||||
|
||||
0?0:0; 0?0?0:0:0; 0||0?x=0:x=0;
|
||||
|
||||
x=0; x*=0; x/=0; x%=0; x+=0; x-=0;
|
||||
x<<=0; x>>=0; x>>>=0; x&=0; x^=0; x|=0;
|
||||
|
||||
0,0; 0,0,0; x=0,x=0;
|
||||
|
||||
|
||||
{} {;} {0} {0;} {0;0} {0;0;}
|
||||
|
||||
var x; var x,y; var x,y,z;
|
||||
var x=0; var x=0,y; var x,y=0; var x=0,y=0;
|
||||
|
||||
;
|
||||
|
||||
if(0); if(0);else;
|
||||
|
||||
do;while(0);
|
||||
while(0);
|
||||
for(;;)break; for(0;0;0); for((0 in[]);0;);
|
||||
for(var a;;)break; for(var a,b;0;0);
|
||||
for(var a=0;;)break; for(var a=(0 in[]);0;);
|
||||
for(x in{}); for(var x in{});
|
||||
for(var x=[]in{}); for(var x=(0 in[])in{});
|
||||
|
||||
for(;0;)continue; x:for(;0;)continue x;
|
||||
|
||||
for(;;)break; x:for(;;)break x;
|
||||
switch(0){case 0:break;}
|
||||
|
||||
function f(){ return; }
|
||||
function f(){ return 0; }
|
||||
|
||||
with(0);
|
||||
|
||||
switch(0){} switch(0){case 0:} switch(0){case 0:case 0:}
|
||||
switch(0){default:} switch(0){case 0:default:case 0:}
|
||||
switch(0){case 0:;} switch(0){case 0:;;}
|
||||
switch(0){default:;} switch(0){default:;;}
|
||||
|
||||
x:; x:y:;
|
||||
|
||||
try { throw 0; }catch(x){}
|
||||
|
||||
try{}catch(x){}
|
||||
try{}finally{}
|
||||
try{}catch(x){}finally{}
|
||||
|
||||
debugger;
|
||||
|
||||
function f(){}
|
||||
function f(x){}
|
||||
function f(x,y){}
|
||||
function f(){ function f(){} }
|
||||
|
||||
function f(){ "use strict" }
|
||||
function f(){ 'use strict' }
|
||||
function f(){ "other directive" }
|
||||
function f(){ 'other directive' }
|
||||
function f(){ ("string") }
|
||||
function f(){ ('string') }
|
||||
function f(){
|
||||
'string'
|
||||
+0
|
||||
}
|
||||
|
||||
(function(){});
|
||||
(function(x){});
|
||||
(function(x,y){});
|
||||
(function(){ function f(){} });
|
||||
(function f(){});
|
||||
(function f(x){});
|
||||
(function f(x,y){});
|
||||
(function f(){ function f(){} });
|
Loading…
Reference in a new issue