added CommentsInfo
This commit is contained in:
		
							parent
							
								
									4ca23ab5c3
								
							
						
					
					
						commit
						ee239985ae
					
				
					 4 changed files with 135 additions and 75 deletions
				
			
		|  | @ -1,12 +1,16 @@ | |||
| package org.schabi.newpipe.extractor.comments; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.util.ArrayList; | ||||
| import java.util.List; | ||||
| 
 | ||||
| import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; | ||||
| import org.schabi.newpipe.extractor.ListInfo; | ||||
| import org.schabi.newpipe.extractor.NewPipe; | ||||
| import org.schabi.newpipe.extractor.StreamingService; | ||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||
| import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||
| import org.schabi.newpipe.extractor.utils.ExtractorHelper; | ||||
| 
 | ||||
| public class CommentsInfo extends ListInfo<CommentsInfoItem>{ | ||||
| 
 | ||||
|  | @ -19,9 +23,88 @@ public class CommentsInfo extends ListInfo<CommentsInfoItem>{ | |||
|         return getInfo(NewPipe.getServiceByUrl(url), url); | ||||
|     } | ||||
| 
 | ||||
| 	private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { | ||||
| 		// TODO Auto-generated method stub | ||||
| 		return null; | ||||
| 	private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) throws ExtractionException, IOException { | ||||
| 	    return getInfo(serviceByUrl.getCommentsExtractor(url)); | ||||
| 	} | ||||
| 
 | ||||
|     private static CommentsInfo getInfo(CommentsExtractor commentsExtractor) throws IOException, ExtractionException { | ||||
|         //for services which do not have a comments extractor | ||||
|         if(null == commentsExtractor) { | ||||
|             return null; | ||||
|         } | ||||
|          | ||||
|         commentsExtractor.fetchPage(); | ||||
|         String name = commentsExtractor.getName(); | ||||
|         int serviceId = commentsExtractor.getServiceId(); | ||||
|         ListLinkHandler listUrlIdHandler = commentsExtractor.getUIHandler(); | ||||
|         CommentsInfo commentsInfo = new CommentsInfo(serviceId, listUrlIdHandler, name); | ||||
|         commentsInfo.setCommentsExtractor(commentsExtractor); | ||||
|             InfoItemsPage<CommentsInfoItem> initialCommentsPage = ExtractorHelper.getItemsPageOrLogError(commentsInfo, | ||||
|                     commentsExtractor); | ||||
|         commentsInfo.setComments(new ArrayList<>()); | ||||
|         commentsInfo.getComments().addAll(initialCommentsPage.getItems()); | ||||
|         commentsInfo.setHasMoreComments(initialCommentsPage.hasNextPage()); | ||||
|         commentsInfo.setNextCommentsPageUrl(initialCommentsPage.getNextPageUrl()); | ||||
|         return commentsInfo; | ||||
|     } | ||||
|      | ||||
|     public static void loadMoreComments(CommentsInfo commentsInfo) { | ||||
|         if (commentsInfo.hasMoreComments()) { | ||||
|             if(null == commentsInfo.getCommentsExtractor()) { | ||||
|                 try { | ||||
|                     commentsInfo.setCommentsExtractor(NewPipe.getService(commentsInfo.getServiceId()).getCommentsExtractor(commentsInfo.getUrl())); | ||||
|                 } catch (ExtractionException e) { | ||||
|                     commentsInfo.addError(e); | ||||
|                     return; | ||||
|                 } | ||||
|             } | ||||
|             try { | ||||
|                 InfoItemsPage<CommentsInfoItem> commentsPage = commentsInfo.getCommentsExtractor() | ||||
|                         .getPage(commentsInfo.getNextCommentsPageUrl()); | ||||
|                 commentsInfo.getComments().addAll(commentsPage.getItems()); | ||||
|                 commentsInfo.setHasMoreComments(commentsPage.hasNextPage()); | ||||
|                 commentsInfo.setNextCommentsPageUrl(commentsPage.getNextPageUrl()); | ||||
|             } catch (IOException | ExtractionException e) { | ||||
|                 commentsInfo.addError(e); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     private transient CommentsExtractor commentsExtractor; | ||||
|     private List<CommentsInfoItem> comments; | ||||
|     private boolean hasMoreComments; | ||||
|     private String nextCommentsPageUrl; | ||||
|      | ||||
|     public List<CommentsInfoItem> getComments() { | ||||
|         return comments; | ||||
|     } | ||||
| 
 | ||||
|     public void setComments(List<CommentsInfoItem> comments) { | ||||
|         this.comments = comments; | ||||
|     } | ||||
| 
 | ||||
|     public boolean hasMoreComments() { | ||||
|         return hasMoreComments; | ||||
|     } | ||||
| 
 | ||||
|     public void setHasMoreComments(boolean hasMoreComments) { | ||||
|         this.hasMoreComments = hasMoreComments; | ||||
|     } | ||||
| 
 | ||||
|     public CommentsExtractor getCommentsExtractor() { | ||||
|         return commentsExtractor; | ||||
|     } | ||||
| 
 | ||||
|     public void setCommentsExtractor(CommentsExtractor commentsExtractor) { | ||||
|         this.commentsExtractor = commentsExtractor; | ||||
|     } | ||||
| 
 | ||||
|     public String getNextCommentsPageUrl() { | ||||
|         return nextCommentsPageUrl; | ||||
|     } | ||||
| 
 | ||||
|     public void setNextCommentsPageUrl(String nextCommentsPageUrl) { | ||||
|         this.nextCommentsPageUrl = nextCommentsPageUrl; | ||||
|     } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -31,13 +31,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
| 
 | ||||
|     private List<String> cookies; | ||||
|     private String sessionToken; | ||||
|     private String commentsToken; | ||||
|     private String title; | ||||
|     private InfoItemsPage<CommentsInfoItem> initPage; | ||||
| 
 | ||||
|     private ObjectMapper mapper = new ObjectMapper(); | ||||
| 
 | ||||
|     public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { | ||||
|         super(service, uiHandler); | ||||
|         // TODO Auto-generated constructor stub | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|  | @ -45,12 +45,16 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|         // initial page does not load any comments but is required to get session token | ||||
|         // and cookies | ||||
|         super.fetchPage(); | ||||
|         return getPage(getNextPageUrl()); | ||||
|         return initPage; | ||||
|     } | ||||
| 
 | ||||
|     // isn't this method redundant. you can just call getnextpage on getInitialPage | ||||
|     @Override | ||||
|     public String getNextPageUrl() throws IOException, ExtractionException { | ||||
|         return getNextPageUrl(commentsToken); | ||||
|         // initial page does not load any comments but is required to get session token | ||||
|         // and cookies | ||||
|         super.fetchPage(); | ||||
|         return initPage.getNextPageUrl(); | ||||
|     } | ||||
| 
 | ||||
|     private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { | ||||
|  | @ -91,6 +95,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|     } | ||||
| 
 | ||||
|     private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { | ||||
|          | ||||
|         fetchTitle(ajaxJson); | ||||
|          | ||||
|         List<JsonNode> comments = ajaxJson.findValues("commentRenderer"); | ||||
|         comments.stream().forEach(c -> { | ||||
|             CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { | ||||
|  | @ -192,19 +199,29 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
| 
 | ||||
|     } | ||||
| 
 | ||||
|     private void fetchTitle(JsonNode ajaxJson) { | ||||
|         if(null == title) { | ||||
|             try { | ||||
|                 title = ajaxJson.findValue("commentTargetTitle").get("simpleText").asText(); | ||||
|             } catch (Exception e) { | ||||
|                 title = "Youtube Comments"; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { | ||||
|         DownloadResponse response = downloader.get(getUrl()); | ||||
|         String responseBody = response.getResponseBody(); | ||||
|         cookies = response.getResponseHeaders().get("Set-Cookie"); | ||||
|         sessionToken = findValue(responseBody, "XSRF_TOKEN"); | ||||
|         commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); | ||||
|         String commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); | ||||
|         initPage = getPage(getNextPageUrl(commentsToken)); | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String getName() throws ParsingException { | ||||
|         // TODO Auto-generated method stub | ||||
|         return null; | ||||
|         return title; | ||||
|     } | ||||
| 
 | ||||
|     private String makeAjaxRequest(String siteUrl) throws IOException, ReCaptchaException { | ||||
|  |  | |||
|  | @ -1,20 +1,11 @@ | |||
| package org.schabi.newpipe.extractor.services.youtube.linkHandler; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.io.UnsupportedEncodingException; | ||||
| import java.net.URI; | ||||
| import java.net.URISyntaxException; | ||||
| import java.net.URLDecoder; | ||||
| import java.util.List; | ||||
| 
 | ||||
| import org.jsoup.Jsoup; | ||||
| import org.jsoup.nodes.Document; | ||||
| import org.jsoup.nodes.Element; | ||||
| import org.schabi.newpipe.extractor.Downloader; | ||||
| import org.schabi.newpipe.extractor.NewPipe; | ||||
| import org.schabi.newpipe.extractor.exceptions.FoundAdException; | ||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||
| import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; | ||||
| import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; | ||||
| import org.schabi.newpipe.extractor.utils.Parser; | ||||
| 
 | ||||
|  | @ -27,6 +18,11 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { | |||
|         return instance; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String getUrl(String id) { | ||||
|         return "https://www.youtube.com/watch?v=" + id; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String getId(String url) throws ParsingException, IllegalArgumentException { | ||||
|         if (url.isEmpty()) { | ||||
|  | @ -44,8 +40,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { | |||
|                 } catch (UnsupportedEncodingException uee) { | ||||
|                     throw new ParsingException("Could not parse attribution_link", uee); | ||||
|                 } | ||||
|             } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { | ||||
|                 return getRealIdFromSharedLink(url); | ||||
|             } else if (url.contains("vnd.youtube")) { | ||||
|                 id = Parser.matchGroup1(ID_PATTERN, url); | ||||
|             } else if (url.contains("embed")) { | ||||
|  | @ -86,56 +80,6 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { | |||
|         } | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Get the real url from a shared uri. | ||||
|      * <p> | ||||
|      * Shared URI's look like this: | ||||
|      * <pre> | ||||
|      *     * https://www.youtube.com/shared?ci=PJICrTByb3E | ||||
|      *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link | ||||
|      * </pre> | ||||
|      * | ||||
|      * @param url The shared url | ||||
|      * @return the id of the stream | ||||
|      * @throws ParsingException | ||||
|      */ | ||||
|     private String getRealIdFromSharedLink(String url) throws ParsingException { | ||||
|         URI uri; | ||||
|         try { | ||||
|             uri = new URI(url); | ||||
|         } catch (URISyntaxException e) { | ||||
|             throw new ParsingException("Invalid shared link", e); | ||||
|         } | ||||
|         String sharedId = getSharedId(uri); | ||||
|         Downloader downloader = NewPipe.getDownloader(); | ||||
|         String content; | ||||
|         try { | ||||
|             content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); | ||||
|         } catch (IOException | ReCaptchaException e) { | ||||
|             throw new ParsingException("Unable to resolve shared link", e); | ||||
|         } | ||||
|         final Document document = Jsoup.parse(content); | ||||
| 
 | ||||
|         final Element element = document.select("link[rel=\"canonical\"]").first(); | ||||
|         final String urlWithRealId = (element != null) | ||||
|                 ? element.attr("abs:href") | ||||
|                 : document.select("meta[property=\"og:url\"]").first() | ||||
|                     .attr("abs:content"); | ||||
| 
 | ||||
|         String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); | ||||
|         if (sharedId.equals(realId)) { | ||||
|             throw new ParsingException("Got same id for as shared info_id: " + sharedId); | ||||
|         } | ||||
|         return realId; | ||||
|     } | ||||
| 
 | ||||
|     private String getSharedId(URI uri) throws ParsingException { | ||||
|         if (!"/shared".equals(uri.getPath())) { | ||||
|             throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); | ||||
|         } | ||||
|         return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public boolean onAcceptUrl(final String url) throws FoundAdException { | ||||
|         final String lowercaseUrl = url.toLowerCase(); | ||||
|  | @ -156,8 +100,8 @@ public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { | |||
|         } | ||||
|     } | ||||
| 
 | ||||
| 	@Override | ||||
| 	public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException { | ||||
| 		return "https://www.youtube.com/watch?v=" + id; | ||||
| 	} | ||||
|     @Override | ||||
|     public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException { | ||||
|         return "https://www.youtube.com/watch?v=" + id; | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ import org.junit.Test; | |||
| import org.schabi.newpipe.Downloader; | ||||
| import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; | ||||
| import org.schabi.newpipe.extractor.NewPipe; | ||||
| import org.schabi.newpipe.extractor.comments.CommentsInfo; | ||||
| import org.schabi.newpipe.extractor.comments.CommentsInfoItem; | ||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||
| import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; | ||||
|  | @ -55,6 +56,21 @@ public class YoutubeCommentsExtractorTest { | |||
| 
 | ||||
|         assertTrue(result); | ||||
|     } | ||||
|      | ||||
|     @Test | ||||
|     public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException { | ||||
|         boolean result = false; | ||||
|         CommentsInfo commentsInfo = CommentsInfo.getInfo("https://www.youtube.com/watch?v=rrgFN3AxGfs"); | ||||
|         assertTrue("what the fuck am i doing with my life.wmv".equals(commentsInfo.getName())); | ||||
|         result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); | ||||
| 
 | ||||
|         while (commentsInfo.hasMoreComments() && !result) { | ||||
|             CommentsInfo.loadMoreComments(commentsInfo); | ||||
|             result = findInComments(commentsInfo.getComments(), "i should really be in the top comment.lol"); | ||||
|         } | ||||
| 
 | ||||
|         assertTrue(result); | ||||
|     } | ||||
| 
 | ||||
|     private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) { | ||||
|         return findInComments(comments.getItems(), comment); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue