[YouTube] Make non-extraction of videoPrimaryInfoRenderer and/or videoSecondaryInfoRenderer not fatal
Also de-duplicated common code related to the obtain of these video info renderers. This change allows extraction of videos without visual metadata.
This commit is contained in:
parent
eb07d70a2c
commit
aa9a8ca23c
1 changed files with 56 additions and 73 deletions
|
@ -204,45 +204,48 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))
|
final String videoPrimaryInfoRendererDateText =
|
||||||
.startsWith("Premiered")) {
|
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"));
|
||||||
final String time = getTextFromObject(
|
|
||||||
getVideoPrimaryInfoRenderer().getObject("dateText")).substring(13);
|
|
||||||
|
|
||||||
try { // Premiered 20 hours ago
|
if (videoPrimaryInfoRendererDateText != null) {
|
||||||
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
|
if (videoPrimaryInfoRendererDateText.startsWith("Premiered")) {
|
||||||
Localization.fromLocalizationCode("en"));
|
final String time = videoPrimaryInfoRendererDateText.substring(13);
|
||||||
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
|
|
||||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
|
try { // Premiered 20 hours ago
|
||||||
} catch (final Exception ignored) {
|
final TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(
|
||||||
|
Localization.fromLocalizationCode("en"));
|
||||||
|
final OffsetDateTime parsedTime = timeAgoParser.parse(time).offsetDateTime();
|
||||||
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(parsedTime);
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
}
|
||||||
|
|
||||||
|
try { // Premiered Feb 21, 2020
|
||||||
|
final LocalDate localDate = LocalDate.parse(time,
|
||||||
|
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
|
||||||
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
}
|
||||||
|
|
||||||
|
try { // Premiered on 21 Feb 2020
|
||||||
|
final LocalDate localDate = LocalDate.parse(time,
|
||||||
|
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
||||||
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||||
|
} catch (final Exception ignored) {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try { // Premiered Feb 21, 2020
|
try {
|
||||||
final LocalDate localDate = LocalDate.parse(time,
|
// TODO: this parses English formatted dates only, we need a better approach to
|
||||||
DateTimeFormatter.ofPattern("MMM dd, yyyy", Locale.ENGLISH));
|
// parse the textual date
|
||||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
final LocalDate localDate = LocalDate.parse(videoPrimaryInfoRendererDateText,
|
||||||
} catch (final Exception ignored) {
|
|
||||||
}
|
|
||||||
|
|
||||||
try { // Premiered on 21 Feb 2020
|
|
||||||
final LocalDate localDate = LocalDate.parse(time,
|
|
||||||
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
||||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
||||||
} catch (final Exception ignored) {
|
} catch (final Exception e) {
|
||||||
|
throw new ParsingException("Could not get upload date", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
throw new ParsingException("Could not get upload date");
|
||||||
// TODO: this parses English formatted dates only, we need a better approach to parse
|
|
||||||
// the textual date
|
|
||||||
final LocalDate localDate = LocalDate.parse(getTextFromObject(
|
|
||||||
getVideoPrimaryInfoRenderer().getObject("dateText")),
|
|
||||||
DateTimeFormatter.ofPattern("dd MMM yyyy", Locale.ENGLISH));
|
|
||||||
return DateTimeFormatter.ISO_LOCAL_DATE.format(localDate);
|
|
||||||
} catch (final Exception e) {
|
|
||||||
throw new ParsingException("Could not get upload date", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -565,19 +568,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public String getUploaderAvatarUrl() throws ParsingException {
|
public String getUploaderAvatarUrl() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
||||||
String url = null;
|
final String url = getVideoSecondaryInfoRenderer()
|
||||||
|
.getObject("owner")
|
||||||
try {
|
.getObject("videoOwnerRenderer")
|
||||||
url = getVideoSecondaryInfoRenderer()
|
.getObject("thumbnail")
|
||||||
.getObject("owner")
|
.getArray("thumbnails")
|
||||||
.getObject("videoOwnerRenderer")
|
.getObject(0)
|
||||||
.getObject("thumbnail")
|
.getString("url");
|
||||||
.getArray("thumbnails")
|
|
||||||
.getObject(0)
|
|
||||||
.getString("url");
|
|
||||||
} catch (final ParsingException ignored) {
|
|
||||||
// Age-restricted videos cause a ParsingException here
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isNullOrEmpty(url)) {
|
if (isNullOrEmpty(url)) {
|
||||||
if (ageLimit == NO_AGE_LIMIT) {
|
if (ageLimit == NO_AGE_LIMIT) {
|
||||||
|
@ -1212,40 +1209,29 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
// Utils
|
// Utils
|
||||||
//////////////////////////////////////////////////////////////////////////*/
|
//////////////////////////////////////////////////////////////////////////*/
|
||||||
|
|
||||||
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
@Nonnull
|
||||||
|
private JsonObject getVideoPrimaryInfoRenderer() {
|
||||||
if (videoPrimaryInfoRenderer != null) {
|
if (videoPrimaryInfoRenderer != null) {
|
||||||
return videoPrimaryInfoRenderer;
|
return videoPrimaryInfoRenderer;
|
||||||
}
|
}
|
||||||
|
|
||||||
final JsonArray contents = nextResponse.getObject("contents")
|
videoPrimaryInfoRenderer = getVideoInfoRenderer("videoPrimaryInfoRenderer");
|
||||||
.getObject("twoColumnWatchNextResults").getObject("results").getObject("results")
|
return videoPrimaryInfoRenderer;
|
||||||
.getArray("contents");
|
|
||||||
JsonObject theVideoPrimaryInfoRenderer = null;
|
|
||||||
|
|
||||||
for (final Object content : contents) {
|
|
||||||
if (((JsonObject) content).has("videoPrimaryInfoRenderer")) {
|
|
||||||
theVideoPrimaryInfoRenderer = ((JsonObject) content)
|
|
||||||
.getObject("videoPrimaryInfoRenderer");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isNullOrEmpty(theVideoPrimaryInfoRenderer)) {
|
|
||||||
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
|
||||||
}
|
|
||||||
|
|
||||||
videoPrimaryInfoRenderer = theVideoPrimaryInfoRenderer;
|
|
||||||
return theVideoPrimaryInfoRenderer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
|
private JsonObject getVideoSecondaryInfoRenderer() {
|
||||||
if (videoSecondaryInfoRenderer != null) {
|
if (videoSecondaryInfoRenderer != null) {
|
||||||
return videoSecondaryInfoRenderer;
|
return videoSecondaryInfoRenderer;
|
||||||
}
|
}
|
||||||
|
|
||||||
videoSecondaryInfoRenderer = nextResponse
|
videoSecondaryInfoRenderer = getVideoInfoRenderer("videoSecondaryInfoRenderer");
|
||||||
.getObject("contents")
|
return videoSecondaryInfoRenderer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nonnull
|
||||||
|
private JsonObject getVideoInfoRenderer(@Nonnull final String videoRendererName) {
|
||||||
|
return nextResponse.getObject("contents")
|
||||||
.getObject("twoColumnWatchNextResults")
|
.getObject("twoColumnWatchNextResults")
|
||||||
.getObject("results")
|
.getObject("results")
|
||||||
.getObject("results")
|
.getObject("results")
|
||||||
|
@ -1253,13 +1239,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
.stream()
|
.stream()
|
||||||
.filter(JsonObject.class::isInstance)
|
.filter(JsonObject.class::isInstance)
|
||||||
.map(JsonObject.class::cast)
|
.map(JsonObject.class::cast)
|
||||||
.filter(content -> content.has("videoSecondaryInfoRenderer"))
|
.filter(content -> content.has(videoRendererName))
|
||||||
.map(content -> content.getObject("videoSecondaryInfoRenderer"))
|
.map(content -> content.getObject(videoRendererName))
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.orElseThrow(
|
.orElse(new JsonObject());
|
||||||
() -> new ParsingException("Could not find videoSecondaryInfoRenderer"));
|
|
||||||
|
|
||||||
return videoSecondaryInfoRenderer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
|
|
Loading…
Reference in a new issue