Refactor frames extraction

This commit is contained in:
Vasiliy 2019-09-10 19:38:51 +03:00
parent a6c94c7a9d
commit f084cfec24
No known key found for this signature in database
GPG key ID: 9F74C4D2874D7523
5 changed files with 140 additions and 145 deletions

View file

@ -1036,28 +1036,60 @@ public class YoutubeStreamExtractor extends StreamExtractor {
};
}
@Nullable
public StreamFrames getFrames() {
try {
final String script = doc.select("#player-api").first().siblingElements().select("script").html();
int p = script.indexOf("ytplayer.config");
if (p == -1) {
return null;
}
p = script.indexOf('{', p);
int e = script.indexOf("ytplayer.load", p);
if (e == -1) {
return null;
}
JsonObject jo = JsonParser.object().from(script.substring(p, e - 1));
final String resp = jo.getObject("args").getString("player_response");
jo = JsonParser.object().from(resp);
final String[] spec = jo.getObject("storyboards").getObject("playerStoryboardSpecRenderer").getString("spec").split("\\|");
final String url = spec[0];
final List<String> opts = Arrays.asList(spec).subList(1, spec.length);
return new StreamFrames(url, opts);
} catch (Exception e) {
return null;
}
}
@Nonnull
@Override
public List<Frameset> getFrames() throws ExtractionException {
try {
final String script = doc.select("#player-api").first().siblingElements().select("script").html();
int p = script.indexOf("ytplayer.config");
if (p == -1) {
return Collections.emptyList();
}
p = script.indexOf('{', p);
int e = script.indexOf("ytplayer.load", p);
if (e == -1) {
return Collections.emptyList();
}
JsonObject jo = JsonParser.object().from(script.substring(p, e - 1));
final String resp = jo.getObject("args").getString("player_response");
jo = JsonParser.object().from(resp);
final String[] spec = jo.getObject("storyboards").getObject("playerStoryboardSpecRenderer").getString("spec").split("\\|");
final String url = spec[0];
final ArrayList<Frameset> result = new ArrayList<>(spec.length - 1);
for (int i = 1; i < spec.length; ++i) {
final String[] parts = spec[i].split("#");
if (parts.length != 8) {
continue;
}
final int frameWidth = Integer.parseInt(parts[0]);
final int frameHeight = Integer.parseInt(parts[1]);
final int totalCount = Integer.parseInt(parts[2]);
final int framesPerPageX = Integer.parseInt(parts[3]);
final int framesPerPageY = Integer.parseInt(parts[4]);
final String baseUrl = url.replace("$L", String.valueOf(i - 1)).replace("$N", parts[6]) + "&sigh=" + parts[7];
final List<String> urls;
if (baseUrl.contains("$M")) {
final int totalPages = (int) Math.ceil(totalCount / (double) (framesPerPageX * framesPerPageY));
urls = new ArrayList<>(totalPages);
for (int j = 0; j < totalPages; j++) {
urls.add(baseUrl.replace("$M", String.valueOf(j)));
}
} else {
urls = Collections.singletonList(baseUrl);
}
result.add(new Frameset(
urls,
frameWidth,
frameHeight,
totalCount,
framesPerPageX,
framesPerPageY
));
}
result.trimToSize();
return result;
} catch (Exception e) {
throw new ExtractionException(e);
}
}
}

View file

@ -0,0 +1,63 @@
package org.schabi.newpipe.extractor.stream;
import javax.annotation.Nullable;
import java.util.Collection;
import java.util.List;
public final class Frameset {
private List<String> urls;
private int frameWidth;
private int frameHeight;
private int totalCount;
private int framesPerPageX;
private int framesPerPageY;
public Frameset(List<String> urls, int frameWidth, int frameHeight, int totalCount, int framesPerPageX, int framesPerPageY) {
this.urls = urls;
this.totalCount = totalCount;
this.frameWidth = frameWidth;
this.frameHeight = frameHeight;
this.framesPerPageX = framesPerPageX;
this.framesPerPageY = framesPerPageY;
}
public List<String> getUrls() {
return urls;
}
/**
* @return total count of frames
*/
public int getTotalCount() {
return totalCount;
}
/**
* @return maximum frames count by x
*/
public int getFramesPerPageX() {
return framesPerPageX;
}
/**
* @return maximum frames count by y
*/
public int getFramesPerPageY() {
return framesPerPageY;
}
/**
* @return width of a one frame, in pixels
*/
public int getFrameWidth() {
return frameWidth;
}
/**
* @return height of a one frame, in pixels
*/
public int getFrameHeight() {
return frameHeight;
}
}

View file

@ -30,7 +30,10 @@ import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
@ -255,6 +258,15 @@ public abstract class StreamExtractor extends Extractor {
*/
public abstract StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException;
/**
* Should return a list of frames
* @return
*/
@Nonnull
public List<Frameset> getFrames() throws IOException, ExtractionException {
return Collections.emptyList();
}
/**
* Should analyse the webpage's document and extracts any error message there might be. (e.g. GEMA block)
*

View file

@ -1,113 +0,0 @@
package org.schabi.newpipe.extractor.stream;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;
public class StreamFrames {
private final List<Frameset> frames;
public StreamFrames(String baseUrl, List<String> params) {
frames = new ArrayList<>(params.size());
for (int i = 0; i < params.size(); i++) {
String param = params.get(i);
final String[] parts = param.split("#");
frames.add(new Frameset(
baseUrl.replace("$L", String.valueOf(i)).replace("$N", parts[6]) + "&sigh=" + parts[7],
Integer.parseInt(parts[0]),
Integer.parseInt(parts[1]),
Integer.parseInt(parts[2]),
Integer.parseInt(parts[3]),
Integer.parseInt(parts[4])
));
}
}
public int getVariantsCount() {
return frames.size();
}
public Frameset getVariant(int index) {
return frames.get(index);
}
@Nullable
public Frameset getDefaultVariant() {
for (final Frameset f : frames) {
if (f.getUrl().contains("default.jpg")) {
return f;
}
}
return null;
}
public static class Frameset {
private String url;
private int frameWidth;
private int frameHeight;
private int totalCount;
private int framesPerPageX;
private int framesPerPageY;
private Frameset(String url, int frameWidth, int frameHeight, int totalCount, int framesPerPageX, int framesPerPageY) {
this.url = url;
this.totalCount = totalCount;
this.frameWidth = frameWidth;
this.frameHeight = frameHeight;
this.framesPerPageX = framesPerPageX;
this.framesPerPageY = framesPerPageY;
}
public String getUrl() {
return url;
}
public String getUrl(int page) {
return url.replace("$M", String.valueOf(page));
}
public int getTotalPages() {
if (!url.contains("$M")) {
return 0;
}
return (int) Math.ceil(totalCount / (double) (framesPerPageX * framesPerPageY));
}
/**
* @return total count of frames
*/
public int getTotalCount() {
return totalCount;
}
/**
* @return maximum frames count by x
*/
public int getFramesPerPageX() {
return framesPerPageX;
}
/**
* @return maximum frames count by y
*/
public int getFramesPerPageY() {
return framesPerPageY;
}
/**
* @return width of a one frame, in pixels
*/
public int getFrameWidth() {
return frameWidth;
}
/**
* @return height of a one frame, in pixels
*/
public int getFrameHeight() {
return frameHeight;
}
}
}

View file

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube;
import org.junit.BeforeClass;
import org.junit.Test;
import org.schabi.newpipe.Downloader;
import org.schabi.newpipe.extractor.ExtractorAsserts;
import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -13,6 +14,7 @@ import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException;
import java.util.List;
import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertIsSecureUrl;
@ -244,15 +246,14 @@ public class YoutubeStreamExtractorDefaultTest {
}
@Test
public void testGetFrames() {
final StreamFrames frames = extractor.getFrames();
public void testGetFrames() throws ExtractionException {
final List<Frameset> frames = extractor.getFrames();
assertNotNull(frames);
assertNotNull(frames.getDefaultVariant());
for (int i=0;i<frames.getVariantsCount();i++) {
final StreamFrames.Frameset frameset = frames.getVariant(i);
final int pages = frameset.getTotalPages();
final String url = pages == 0 ? frameset.getUrl() : frameset.getUrl(pages - 1);
assertNotNull(url);
assertFalse(frames.isEmpty());
for (final Frameset f : frames) {
for (final String url : f.getUrls()) {
ExtractorAsserts.assertIsValidUrl(url);
}
}
}
}