Fix YouTube subscriber count
Modify test to fail on too small subscriber count
This commit is contained in:
parent
dbdd9ed083
commit
06016d1ae3
3 changed files with 35 additions and 3 deletions
|
@ -49,7 +49,7 @@ import java.util.ArrayList;
|
|||
public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
|
||||
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
|
||||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";
|
||||
|
||||
private Document doc;
|
||||
|
||||
|
@ -135,10 +135,11 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
|
||||
@Override
|
||||
public long getSubscriberCount() throws ParsingException {
|
||||
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
|
||||
final String el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]")
|
||||
.first().attr("title");
|
||||
if (el != null) {
|
||||
try {
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(el.text()));
|
||||
return Utils.mixedNumberWordToLong(el);
|
||||
} catch (NumberFormatException e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,35 @@ public class Utils {
|
|||
return toRemove.replaceAll("\\D+", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>Convert a mixed number word to a long.</p>
|
||||
* <p>Examples:</p>
|
||||
* <ul>
|
||||
* <li>123 -> 123</li>
|
||||
* <li>1.23K -> 1230</li>
|
||||
* <li>1.23M -> 1230000</li>
|
||||
* </ul>
|
||||
* @param numberWord string to be converted to a long
|
||||
* @return a long
|
||||
* @throws NumberFormatException
|
||||
* @throws ParsingException
|
||||
*/
|
||||
public static long mixedNumberWordToLong(String numberWord) throws NumberFormatException, ParsingException {
|
||||
String multiplier = "";
|
||||
try {
|
||||
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMkm])+", numberWord, 2);
|
||||
} catch(ParsingException ignored) {}
|
||||
double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)", numberWord));
|
||||
switch (multiplier.toUpperCase()) {
|
||||
case "K":
|
||||
return (long) (count * 1e3);
|
||||
case "M":
|
||||
return (long) (count * 1e6);
|
||||
default:
|
||||
return (long) (count);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the url matches the pattern.
|
||||
*
|
||||
|
|
|
@ -105,6 +105,7 @@ public class YoutubeChannelExtractorTest {
|
|||
@Test
|
||||
public void testSubscriberCount() throws Exception {
|
||||
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
|
||||
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 4e6);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -195,6 +196,7 @@ public class YoutubeChannelExtractorTest {
|
|||
@Test
|
||||
public void testSubscriberCount() throws Exception {
|
||||
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
|
||||
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 10e6);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue