added comments extractor
This commit is contained in:
		
							parent
							
								
									1e7bcfbd66
								
							
						
					
					
						commit
						53d3997904
					
				
					 17 changed files with 1007 additions and 159 deletions
				
			
		|  | @ -1,7 +1,7 @@ | ||||||
| allprojects { | allprojects { | ||||||
|     apply plugin: 'java-library' |     apply plugin: 'java-library' | ||||||
|     sourceCompatibility = 1.7 |     sourceCompatibility = 1.8 | ||||||
|     targetCompatibility = 1.7 |     targetCompatibility = 1.8 | ||||||
| 
 | 
 | ||||||
|     version 'v0.13.0' |     version 'v0.13.0' | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -6,6 +6,7 @@ dependencies { | ||||||
|     implementation 'org.mozilla:rhino:1.7.7.1' |     implementation 'org.mozilla:rhino:1.7.7.1' | ||||||
|     implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0' |     implementation 'com.github.spotbugs:spotbugs-annotations:3.1.0' | ||||||
|     implementation 'org.nibor.autolink:autolink:0.8.0' |     implementation 'org.nibor.autolink:autolink:0.8.0' | ||||||
|  |     implementation 'com.fasterxml.jackson.core:jackson-databind:2.9.5' | ||||||
| 
 | 
 | ||||||
|     testImplementation 'junit:junit:4.12' |     testImplementation 'junit:junit:4.12' | ||||||
| } | } | ||||||
|  | @ -0,0 +1,26 @@ | ||||||
|  | package org.schabi.newpipe.extractor; | ||||||
|  | 
 | ||||||
|  | import java.util.List; | ||||||
|  | import java.util.Map; | ||||||
|  | 
 | ||||||
|  | public class DownloadResponse { | ||||||
|  | 	private final String responseBody; | ||||||
|  | 	private final Map<String, List<String>> responseHeaders; | ||||||
|  | 	 | ||||||
|  | 	 | ||||||
|  | 
 | ||||||
|  | 	public DownloadResponse(String responseBody, Map<String, List<String>> headers) { | ||||||
|  | 		super(); | ||||||
|  | 		this.responseBody = responseBody; | ||||||
|  | 		this.responseHeaders = headers; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getResponseBody() { | ||||||
|  | 		return responseBody; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public Map<String, List<String>> getResponseHeaders() { | ||||||
|  | 		return responseHeaders; | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | } | ||||||
|  | @ -1,10 +1,11 @@ | ||||||
| package org.schabi.newpipe.extractor; | package org.schabi.newpipe.extractor; | ||||||
| 
 | 
 | ||||||
| import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; |  | ||||||
| 
 |  | ||||||
| import java.io.IOException; | import java.io.IOException; | ||||||
|  | import java.util.List; | ||||||
| import java.util.Map; | import java.util.Map; | ||||||
| 
 | 
 | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; | ||||||
|  | 
 | ||||||
| /* | /* | ||||||
|  * Created by Christian Schabesberger on 28.01.16. |  * Created by Christian Schabesberger on 28.01.16. | ||||||
|  * |  * | ||||||
|  | @ -28,19 +29,20 @@ import java.util.Map; | ||||||
| public interface Downloader { | public interface Downloader { | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Download the text file at the supplied URL as in download(String), | 	 * Download the text file at the supplied URL as in download(String), but set | ||||||
|      * but set the HTTP header field "Accept-Language" to the supplied string. | 	 * the HTTP header field "Accept-Language" to the supplied string. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl  the URL of the text file to return the contents of | 	 * @param siteUrl  the URL of the text file to return the contents of | ||||||
|      * @param language the language (usually a 2-character code) to set as the preferred language | 	 * @param language the language (usually a 2-character code) to set as the | ||||||
|  | 	 *                 preferred language | ||||||
| 	 * @return the contents of the specified text file | 	 * @return the contents of the specified text file | ||||||
| 	 * @throws IOException | 	 * @throws IOException | ||||||
| 	 */ | 	 */ | ||||||
| 	String download(String siteUrl, String language) throws IOException, ReCaptchaException; | 	String download(String siteUrl, String language) throws IOException, ReCaptchaException; | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Download the text file at the supplied URL as in download(String), | 	 * Download the text file at the supplied URL as in download(String), but set | ||||||
|      * but set the HTTP header field "Accept-Language" to the supplied string. | 	 * the HTTP header field "Accept-Language" to the supplied string. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl          the URL of the text file to return the contents of | 	 * @param siteUrl          the URL of the text file to return the contents of | ||||||
| 	 * @param customProperties set request header properties | 	 * @param customProperties set request header properties | ||||||
|  | @ -50,12 +52,17 @@ public interface Downloader { | ||||||
| 	String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException; | 	String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException; | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Download (via HTTP) the text file located at the supplied URL, and return its contents. | 	 * Download (via HTTP) the text file located at the supplied URL, and return its | ||||||
|      * Primarily intended for downloading web pages. | 	 * contents. Primarily intended for downloading web pages. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl the URL of the text file to download | 	 * @param siteUrl the URL of the text file to download | ||||||
| 	 * @return the contents of the specified text file | 	 * @return the contents of the specified text file | ||||||
| 	 * @throws IOException | 	 * @throws IOException | ||||||
| 	 */ | 	 */ | ||||||
| 	String download(String siteUrl) throws IOException, ReCaptchaException; | 	String download(String siteUrl) throws IOException, ReCaptchaException; | ||||||
|  | 
 | ||||||
|  | 	DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders) | ||||||
|  | 			throws IOException, ReCaptchaException; | ||||||
|  | 
 | ||||||
|  | 	DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -68,6 +68,7 @@ public abstract class InfoItem implements Serializable { | ||||||
|     public enum InfoType { |     public enum InfoType { | ||||||
|         STREAM, |         STREAM, | ||||||
|         PLAYLIST, |         PLAYLIST, | ||||||
|         CHANNEL |         CHANNEL, | ||||||
|  |         COMMENT | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,18 +1,24 @@ | ||||||
| package org.schabi.newpipe.extractor; | package org.schabi.newpipe.extractor; | ||||||
| 
 | 
 | ||||||
| import org.schabi.newpipe.extractor.channel.ChannelExtractor; |  | ||||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; |  | ||||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; |  | ||||||
| import org.schabi.newpipe.extractor.kiosk.KioskList; |  | ||||||
| import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; |  | ||||||
| import org.schabi.newpipe.extractor.search.SearchExtractor; |  | ||||||
| import org.schabi.newpipe.extractor.linkhandler.*; |  | ||||||
| import org.schabi.newpipe.extractor.stream.StreamExtractor; |  | ||||||
| import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; |  | ||||||
| 
 |  | ||||||
| import java.util.Collections; | import java.util.Collections; | ||||||
| import java.util.List; | import java.util.List; | ||||||
| 
 | 
 | ||||||
|  | import org.schabi.newpipe.extractor.channel.ChannelExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | import org.schabi.newpipe.extractor.kiosk.KioskList; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.LinkHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.search.SearchExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.stream.StreamExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; | ||||||
|  | 
 | ||||||
| public abstract class StreamingService { | public abstract class StreamingService { | ||||||
|     public static class ServiceInfo { |     public static class ServiceInfo { | ||||||
|         private final String name; |         private final String name; | ||||||
|  | @ -71,6 +77,7 @@ public abstract class StreamingService { | ||||||
|     public abstract ListLinkHandlerFactory getChannelLHFactory(); |     public abstract ListLinkHandlerFactory getChannelLHFactory(); | ||||||
|     public abstract ListLinkHandlerFactory getPlaylistLHFactory(); |     public abstract ListLinkHandlerFactory getPlaylistLHFactory(); | ||||||
|     public abstract SearchQueryHandlerFactory getSearchQHFactory(); |     public abstract SearchQueryHandlerFactory getSearchQHFactory(); | ||||||
|  |     public abstract ListLinkHandlerFactory getCommentsLHFactory(); | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|     //////////////////////////////////////////// |     //////////////////////////////////////////// | ||||||
|  | @ -84,6 +91,7 @@ public abstract class StreamingService { | ||||||
|     public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; |     public abstract ChannelExtractor getChannelExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; | ||||||
|     public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; |     public abstract PlaylistExtractor getPlaylistExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; | ||||||
|     public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException; |     public abstract StreamExtractor getStreamExtractor(LinkHandler UIHFactory) throws ExtractionException; | ||||||
|  |     public abstract CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException; | ||||||
| 
 | 
 | ||||||
|     public SearchExtractor getSearchExtractor(String query, List<String> contentFilter, String sortFilter, String contentCountry) throws ExtractionException { |     public SearchExtractor getSearchExtractor(String query, List<String> contentFilter, String sortFilter, String contentCountry) throws ExtractionException { | ||||||
|         return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry); |         return getSearchExtractor(getSearchQHFactory().fromQuery(query, contentFilter, sortFilter), contentCountry); | ||||||
|  | @ -113,6 +121,12 @@ public abstract class StreamingService { | ||||||
|         return getStreamExtractor(getStreamLHFactory().fromUrl(url)); |         return getStreamExtractor(getStreamLHFactory().fromUrl(url)); | ||||||
|     } |     } | ||||||
|      |      | ||||||
|  |     public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { | ||||||
|  |         return getCommentsExtractor(getCommentsLHFactory().fromUrl(url)); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|   |   | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|  |  | ||||||
|  | @ -0,0 +1,14 @@ | ||||||
|  | package org.schabi.newpipe.extractor.comments; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.ListExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.StreamingService; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||||
|  | 
 | ||||||
|  | public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> { | ||||||
|  | 
 | ||||||
|  | 	public CommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { | ||||||
|  | 		super(service, uiHandler); | ||||||
|  | 		// TODO Auto-generated constructor stub | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | @ -0,0 +1,27 @@ | ||||||
|  | package org.schabi.newpipe.extractor.comments; | ||||||
|  | 
 | ||||||
|  | import java.io.IOException; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.ListInfo; | ||||||
|  | import org.schabi.newpipe.extractor.NewPipe; | ||||||
|  | import org.schabi.newpipe.extractor.StreamingService; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||||
|  | 
 | ||||||
|  | public class CommentsInfo extends ListInfo<CommentsInfoItem>{ | ||||||
|  | 
 | ||||||
|  | 	private CommentsInfo(int serviceId, ListLinkHandler listUrlIdHandler, String name) { | ||||||
|  | 		super(serviceId, listUrlIdHandler, name); | ||||||
|  | 		// TODO Auto-generated constructor stub | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public static CommentsInfo getInfo(String url) throws IOException, ExtractionException { | ||||||
|  |         return getInfo(NewPipe.getServiceByUrl(url), url); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 	private static CommentsInfo getInfo(StreamingService serviceByUrl, String url) { | ||||||
|  | 		// TODO Auto-generated method stub | ||||||
|  | 		return null; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | @ -0,0 +1,76 @@ | ||||||
|  | package org.schabi.newpipe.extractor.comments; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.InfoItem; | ||||||
|  | 
 | ||||||
|  | public class CommentsInfoItem extends InfoItem{ | ||||||
|  | 
 | ||||||
|  | 	private String commentId; | ||||||
|  | 	private String commentText; | ||||||
|  | 	private String authorName; | ||||||
|  | 	private String authorThumbnail; | ||||||
|  | 	private String authorEndpoint; | ||||||
|  | 	private String publishedTime; | ||||||
|  | 	private Integer likeCount; | ||||||
|  | 	 | ||||||
|  | 	public CommentsInfoItem(int serviceId, String url, String name) { | ||||||
|  | 		super(InfoType.COMMENT, serviceId, url, name); | ||||||
|  | 		// TODO Auto-generated constructor stub | ||||||
|  | 	} | ||||||
|  | 	 | ||||||
|  | 	public String getCommentText() { | ||||||
|  | 		return commentText; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setCommentText(String contentText) { | ||||||
|  | 		this.commentText = contentText; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getAuthorName() { | ||||||
|  | 		return authorName; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setAuthorName(String authorName) { | ||||||
|  | 		this.authorName = authorName; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getAuthorThumbnail() { | ||||||
|  | 		return authorThumbnail; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setAuthorThumbnail(String authorThumbnail) { | ||||||
|  | 		this.authorThumbnail = authorThumbnail; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getAuthorEndpoint() { | ||||||
|  | 		return authorEndpoint; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setAuthorEndpoint(String authorEndpoint) { | ||||||
|  | 		this.authorEndpoint = authorEndpoint; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getPublishedTime() { | ||||||
|  | 		return publishedTime; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setPublishedTime(String publishedTime) { | ||||||
|  | 		this.publishedTime = publishedTime; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public Integer getLikeCount() { | ||||||
|  | 		return likeCount; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setLikeCount(Integer likeCount) { | ||||||
|  | 		this.likeCount = likeCount; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public String getCommentId() { | ||||||
|  | 		return commentId; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public void setCommentId(String commentId) { | ||||||
|  | 		this.commentId = commentId; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | @ -0,0 +1,22 @@ | ||||||
|  | package org.schabi.newpipe.extractor.comments; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.InfoItemExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | 
 | ||||||
|  | public interface CommentsInfoItemExtractor extends InfoItemExtractor { | ||||||
|  | 
 | ||||||
|  | 	String getCommentId() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	String getCommentText() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	String getAuthorName() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	String getAuthorThumbnail() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	String getAuthorEndpoint() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	String getPublishedTime() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | 	Integer getLikeCount() throws ParsingException; | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | @ -0,0 +1,103 @@ | ||||||
|  | package org.schabi.newpipe.extractor.comments; | ||||||
|  | 
 | ||||||
|  | import java.util.List; | ||||||
|  | import java.util.Vector; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.InfoItem; | ||||||
|  | import org.schabi.newpipe.extractor.InfoItemsCollector; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | 
 | ||||||
|  | /* | ||||||
|  |  * Created by Christian Schabesberger on 28.02.16. | ||||||
|  |  * | ||||||
|  |  * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org> | ||||||
|  |  * CommentsInfoItemsCollector.java is part of NewPipe. | ||||||
|  |  * | ||||||
|  |  * NewPipe is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * NewPipe is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoItem, CommentsInfoItemExtractor> { | ||||||
|  | 
 | ||||||
|  | 	public CommentsInfoItemsCollector(int serviceId) { | ||||||
|  | 		super(serviceId); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public CommentsInfoItem extract(CommentsInfoItemExtractor extractor) throws ParsingException { | ||||||
|  | 
 | ||||||
|  | 		// important information | ||||||
|  | 		int serviceId = getServiceId(); | ||||||
|  | 		String url = extractor.getUrl(); | ||||||
|  | 		String name = extractor.getName(); | ||||||
|  | 
 | ||||||
|  | 		CommentsInfoItem resultItem = new CommentsInfoItem(serviceId, url, name); | ||||||
|  | 
 | ||||||
|  | 		// optional information | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setCommentId(extractor.getCommentId()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setCommentText(extractor.getCommentText()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setAuthorName(extractor.getAuthorName()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setAuthorThumbnail(extractor.getAuthorThumbnail()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setAuthorEndpoint(extractor.getAuthorEndpoint()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setPublishedTime(extractor.getPublishedTime()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		try { | ||||||
|  | 			resultItem.setLikeCount(extractor.getLikeCount()); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 		return resultItem; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public void commit(CommentsInfoItemExtractor extractor) { | ||||||
|  | 		try { | ||||||
|  | 			addItem(extract(extractor)); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			addError(e); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	public List<CommentsInfoItem> getCommentsInfoItemList() { | ||||||
|  | 		List<CommentsInfoItem> siiList = new Vector<>(); | ||||||
|  | 		for (InfoItem ii : super.getItems()) { | ||||||
|  | 			if (ii instanceof CommentsInfoItem) { | ||||||
|  | 				siiList.add((CommentsInfoItem) ii); | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		return siiList; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | @ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.soundcloud; | ||||||
| import org.schabi.newpipe.extractor.*; | import org.schabi.newpipe.extractor.*; | ||||||
| import org.schabi.newpipe.extractor.linkhandler.*; | import org.schabi.newpipe.extractor.linkhandler.*; | ||||||
| import org.schabi.newpipe.extractor.channel.ChannelExtractor; | import org.schabi.newpipe.extractor.channel.ChannelExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
| import org.schabi.newpipe.extractor.kiosk.KioskExtractor; | import org.schabi.newpipe.extractor.kiosk.KioskExtractor; | ||||||
| import org.schabi.newpipe.extractor.kiosk.KioskList; | import org.schabi.newpipe.extractor.kiosk.KioskList; | ||||||
|  | @ -98,4 +99,16 @@ public class SoundcloudService extends StreamingService { | ||||||
|     public SubscriptionExtractor getSubscriptionExtractor() { |     public SubscriptionExtractor getSubscriptionExtractor() { | ||||||
|         return new SoundcloudSubscriptionExtractor(this); |         return new SoundcloudSubscriptionExtractor(this); | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public ListLinkHandlerFactory getCommentsLHFactory() { | ||||||
|  | 		// TODO Auto-generated method stub | ||||||
|  | 		return null; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { | ||||||
|  | 		// TODO Auto-generated method stub | ||||||
|  | 		return null; | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -1,21 +1,42 @@ | ||||||
| package org.schabi.newpipe.extractor.services.youtube; | package org.schabi.newpipe.extractor.services.youtube; | ||||||
| 
 | 
 | ||||||
| import org.schabi.newpipe.extractor.*; | import static java.util.Arrays.asList; | ||||||
| import org.schabi.newpipe.extractor.linkhandler.*; | import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.AUDIO; | ||||||
|  | import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.LIVE; | ||||||
|  | import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.VIDEO; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.StreamingService; | ||||||
|  | import org.schabi.newpipe.extractor.SuggestionExtractor; | ||||||
| import org.schabi.newpipe.extractor.channel.ChannelExtractor; | import org.schabi.newpipe.extractor.channel.ChannelExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
| import org.schabi.newpipe.extractor.kiosk.KioskExtractor; | import org.schabi.newpipe.extractor.kiosk.KioskExtractor; | ||||||
| import org.schabi.newpipe.extractor.kiosk.KioskList; | import org.schabi.newpipe.extractor.kiosk.KioskList; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.LinkHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; | ||||||
| import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; | import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; | ||||||
| import org.schabi.newpipe.extractor.search.SearchExtractor; | import org.schabi.newpipe.extractor.search.SearchExtractor; | ||||||
| import org.schabi.newpipe.extractor.services.youtube.extractors.*; | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeChannelExtractor; | ||||||
| import org.schabi.newpipe.extractor.services.youtube.linkHandler.*; | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubePlaylistExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeStreamExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSubscriptionExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSuggestionExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeTrendingExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeCommentsLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeTrendingLinkHandlerFactory; | ||||||
| import org.schabi.newpipe.extractor.stream.StreamExtractor; | import org.schabi.newpipe.extractor.stream.StreamExtractor; | ||||||
| import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; | import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; | ||||||
| 
 | 
 | ||||||
| import static java.util.Arrays.asList; |  | ||||||
| import static org.schabi.newpipe.extractor.StreamingService.ServiceInfo.MediaCapability.*; |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
|  * Created by Christian Schabesberger on 23.08.15. |  * Created by Christian Schabesberger on 23.08.15. | ||||||
|  | @ -115,4 +136,14 @@ public class YoutubeService extends StreamingService { | ||||||
|         return new YoutubeSubscriptionExtractor(this); |         return new YoutubeSubscriptionExtractor(this); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public ListLinkHandlerFactory getCommentsLHFactory() { | ||||||
|  | 		return YoutubeCommentsLinkHandlerFactory.getInstance(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { | ||||||
|  | 		return new YoutubeCommentsExtractor(this, urlIdHandler); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -0,0 +1,262 @@ | ||||||
|  | package org.schabi.newpipe.extractor.services.youtube.extractors; | ||||||
|  | 
 | ||||||
|  | import java.io.BufferedReader; | ||||||
|  | import java.io.IOException; | ||||||
|  | import java.io.InputStreamReader; | ||||||
|  | import java.io.UnsupportedEncodingException; | ||||||
|  | import java.net.URL; | ||||||
|  | import java.net.URLEncoder; | ||||||
|  | import java.util.HashMap; | ||||||
|  | import java.util.List; | ||||||
|  | import java.util.Map; | ||||||
|  | 
 | ||||||
|  | import javax.net.ssl.HttpsURLConnection; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.DownloadResponse; | ||||||
|  | import org.schabi.newpipe.extractor.Downloader; | ||||||
|  | import org.schabi.newpipe.extractor.StreamingService; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsInfoItem; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||||
|  | 
 | ||||||
|  | import com.fasterxml.jackson.databind.JsonNode; | ||||||
|  | import com.fasterxml.jackson.databind.ObjectMapper; | ||||||
|  | 
 | ||||||
|  | public class YoutubeCommentsExtractor extends CommentsExtractor { | ||||||
|  | 
 | ||||||
|  | 	private static final String USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0"; | ||||||
|  | 
 | ||||||
|  | 	private List<String> cookies; | ||||||
|  | 	private String sessionToken; | ||||||
|  | 	private String commentsToken; | ||||||
|  | 
 | ||||||
|  | 	private ObjectMapper mapper = new ObjectMapper(); | ||||||
|  | 
 | ||||||
|  | 	public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHandler) { | ||||||
|  | 		super(service, uiHandler); | ||||||
|  | 		// TODO Auto-generated constructor stub | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException { | ||||||
|  | 		// initial page does not load any comments but is required to get session token | ||||||
|  | 		// and cookies | ||||||
|  | 		return getPage(getNextPageUrl()); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public String getNextPageUrl() throws IOException, ExtractionException { | ||||||
|  | 		return getNextPageUrl(commentsToken); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private String getNextPageUrl(JsonNode ajaxJson) throws IOException, ExtractionException { | ||||||
|  | 		String continuation; | ||||||
|  | 		try { | ||||||
|  | 			continuation = ajaxJson.findValue("itemSectionContinuation").get("continuations").findValue("continuation") | ||||||
|  | 					.asText(); | ||||||
|  | 		} catch (Exception e) { | ||||||
|  | 			// no more comments | ||||||
|  | 			return ""; | ||||||
|  | 		} | ||||||
|  | 		return getNextPageUrl(continuation); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private String getNextPageUrl(String continuation) throws ParsingException { | ||||||
|  | 		Map<String, String> params = new HashMap<>(); | ||||||
|  | 		params.put("action_get_comments", "1"); | ||||||
|  | 		params.put("pbj", "1"); | ||||||
|  | 		params.put("ctoken", continuation); | ||||||
|  | 		params.put("continuation", continuation); | ||||||
|  | 		try { | ||||||
|  | 			return "https://www.youtube.com/comment_service_ajax?" + getDataString(params); | ||||||
|  | 		} catch (UnsupportedEncodingException e) { | ||||||
|  | 			throw new ParsingException("Could not get next page url", e); | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public InfoItemsPage<CommentsInfoItem> getPage(String pageUrl) throws IOException, ExtractionException { | ||||||
|  | 		if (pageUrl == null || pageUrl.isEmpty()) { | ||||||
|  | 			throw new ExtractionException(new IllegalArgumentException("Page url is empty or null")); | ||||||
|  | 		} | ||||||
|  | 		String ajaxResponse = makeAjaxRequest(pageUrl); | ||||||
|  | 		JsonNode ajaxJson = mapper.readTree(ajaxResponse); | ||||||
|  | 		CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId()); | ||||||
|  | 		collectCommentsFrom(collector, ajaxJson, pageUrl); | ||||||
|  | 		return new InfoItemsPage<>(collector, getNextPageUrl(ajaxJson)); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private void collectCommentsFrom(CommentsInfoItemsCollector collector, JsonNode ajaxJson, String pageUrl) { | ||||||
|  | 		List<JsonNode> comments = ajaxJson.findValues("commentRenderer"); | ||||||
|  | 		comments.stream().forEach(c -> { | ||||||
|  | 			CommentsInfoItemExtractor extractor = new CommentsInfoItemExtractor() { | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getUrl() throws ParsingException { | ||||||
|  | 					return pageUrl; | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getThumbnailUrl() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getName() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("authorText").get("simpleText").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getPublishedTime() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("publishedTimeText").get("runs").get(0).get("text").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public Integer getLikeCount() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("likeCount").intValue(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getCommentText() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						if (null != c.get("contentText").get("simpleText")) { | ||||||
|  | 							return c.get("contentText").get("simpleText").asText(); | ||||||
|  | 						} else { | ||||||
|  | 							return c.get("contentText").get("runs").get(0).get("text").asText(); | ||||||
|  | 						} | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getCommentId() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("commentId").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getAuthorThumbnail() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("authorThumbnail").get("thumbnails").get(0).get("url").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getAuthorName() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return c.get("authorText").get("simpleText").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 
 | ||||||
|  | 				@Override | ||||||
|  | 				public String getAuthorEndpoint() throws ParsingException { | ||||||
|  | 					try { | ||||||
|  | 						return "https://youtube.com" | ||||||
|  | 								+ c.get("authorEndpoint").get("browseEndpoint").get("canonicalBaseUrl").asText(); | ||||||
|  | 					} catch (Exception e) { | ||||||
|  | 						throw new ParsingException("Could not get thumbnail url", e); | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			}; | ||||||
|  | 
 | ||||||
|  | 			collector.commit(extractor); | ||||||
|  | 		}); | ||||||
|  | 
 | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public void onFetchPage(Downloader downloader) throws IOException, ExtractionException { | ||||||
|  | 		DownloadResponse response = downloader.downloadWithHeaders(getUrl()); | ||||||
|  | 		String responseBody = response.getResponseBody(); | ||||||
|  | 		cookies = response.getResponseHeaders().get("Set-Cookie"); | ||||||
|  | 		sessionToken = findValue(responseBody, "XSRF_TOKEN"); | ||||||
|  | 		commentsToken = findValue(responseBody, "COMMENTS_TOKEN"); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public String getName() throws ParsingException { | ||||||
|  | 		// TODO Auto-generated method stub | ||||||
|  | 		return null; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private String makeAjaxRequest(String siteUrl) throws IOException { | ||||||
|  | 
 | ||||||
|  | 		StringBuilder postData = new StringBuilder(); | ||||||
|  | 		postData.append(URLEncoder.encode("session_token", "UTF-8")); | ||||||
|  | 		postData.append('='); | ||||||
|  | 		postData.append(URLEncoder.encode(sessionToken, "UTF-8")); | ||||||
|  | 		byte[] postDataBytes = postData.toString().getBytes("UTF-8"); | ||||||
|  | 
 | ||||||
|  | 		URL url = new URL(siteUrl); | ||||||
|  | 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||||
|  | 		con.setRequestMethod("POST"); | ||||||
|  | 		con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); | ||||||
|  | 		con.setRequestProperty("Content-Length", String.valueOf(postDataBytes.length)); | ||||||
|  | 		con.setRequestProperty("Accept", "*/*"); | ||||||
|  | 		con.setRequestProperty("User-Agent", USER_AGENT); | ||||||
|  | 		con.setRequestProperty("X-YouTube-Client-Version", "2.20180815"); | ||||||
|  | 		con.setRequestProperty("X-YouTube-Client-Name", "1"); | ||||||
|  | 		// set cookies | ||||||
|  | 		cookies.stream().forEach(c -> con.addRequestProperty("Cookie", c)); | ||||||
|  | 		con.setDoOutput(true); | ||||||
|  | 		con.getOutputStream().write(postDataBytes); | ||||||
|  | 
 | ||||||
|  | 		BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); | ||||||
|  | 		StringBuilder sb = new StringBuilder(); | ||||||
|  | 		String inputLine; | ||||||
|  | 		while ((inputLine = in.readLine()) != null) { | ||||||
|  | 			sb.append(inputLine); | ||||||
|  | 		} | ||||||
|  | 		return sb.toString(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private String getDataString(Map<String, String> params) throws UnsupportedEncodingException { | ||||||
|  | 		StringBuilder result = new StringBuilder(); | ||||||
|  | 		boolean first = true; | ||||||
|  | 		for (Map.Entry<String, String> entry : params.entrySet()) { | ||||||
|  | 			if (first) | ||||||
|  | 				first = false; | ||||||
|  | 			else | ||||||
|  | 				result.append("&"); | ||||||
|  | 			result.append(URLEncoder.encode(entry.getKey(), "UTF-8")); | ||||||
|  | 			result.append("="); | ||||||
|  | 			result.append(URLEncoder.encode(entry.getValue(), "UTF-8")); | ||||||
|  | 		} | ||||||
|  | 		return result.toString(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private String findValue(String doc, String key) { | ||||||
|  | 		int beginIndex = doc.indexOf(key) + key.length() + 4; | ||||||
|  | 		int endIndex = doc.indexOf("\"", beginIndex); | ||||||
|  | 		return doc.substring(beginIndex, endIndex); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | } | ||||||
|  | @ -0,0 +1,183 @@ | ||||||
|  | package org.schabi.newpipe.extractor.services.youtube.linkHandler; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory; | ||||||
|  | import org.jsoup.Jsoup; | ||||||
|  | import org.jsoup.nodes.Document; | ||||||
|  | import org.jsoup.nodes.Element; | ||||||
|  | import org.schabi.newpipe.extractor.Downloader; | ||||||
|  | import org.schabi.newpipe.extractor.NewPipe; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.FoundAdException; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; | ||||||
|  | import org.schabi.newpipe.extractor.utils.Parser; | ||||||
|  | 
 | ||||||
|  | import java.io.IOException; | ||||||
|  | import java.io.UnsupportedEncodingException; | ||||||
|  | import java.net.URI; | ||||||
|  | import java.net.URISyntaxException; | ||||||
|  | import java.net.URLDecoder; | ||||||
|  | import java.util.List; | ||||||
|  | 
 | ||||||
|  | /* | ||||||
|  |  * Created by Christian Schabesberger on 25.07.16. | ||||||
|  |  * | ||||||
|  |  * Copyright (C) Christian Schabesberger 2018 <chrźis.schabesberger@mailbox.org> | ||||||
|  |  * YoutubeChannelLinkHandlerFactory.java is part of NewPipe. | ||||||
|  |  * | ||||||
|  |  * NewPipe is free software: you can redistribute it and/or modify | ||||||
|  |  * it under the terms of the GNU General Public License as published by | ||||||
|  |  * the Free Software Foundation, either version 3 of the License, or | ||||||
|  |  * (at your option) any later version. | ||||||
|  |  * | ||||||
|  |  * NewPipe is distributed in the hope that it will be useful, | ||||||
|  |  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  |  * GNU General Public License for more details. | ||||||
|  |  * | ||||||
|  |  * You should have received a copy of the GNU General Public License | ||||||
|  |  * along with NewPipe.  If not, see <http://www.gnu.org/licenses/>. | ||||||
|  |  */ | ||||||
|  | 
 | ||||||
|  | public class YoutubeCommentsLinkHandlerFactory extends ListLinkHandlerFactory { | ||||||
|  | 
 | ||||||
|  |     private static final YoutubeCommentsLinkHandlerFactory instance = new YoutubeCommentsLinkHandlerFactory(); | ||||||
|  |     private static final String ID_PATTERN = "([\\-a-zA-Z0-9_]{11})"; | ||||||
|  | 
 | ||||||
|  |     public static YoutubeCommentsLinkHandlerFactory getInstance() { | ||||||
|  |         return instance; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     @Override | ||||||
|  |     public String getId(String url) throws ParsingException, IllegalArgumentException { | ||||||
|  |         if (url.isEmpty()) { | ||||||
|  |             throw new IllegalArgumentException("The url parameter should not be empty"); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         String id; | ||||||
|  |         String lowercaseUrl = url.toLowerCase(); | ||||||
|  |         if (lowercaseUrl.contains("youtube")) { | ||||||
|  |             if (url.contains("attribution_link")) { | ||||||
|  |                 try { | ||||||
|  |                     String escapedQuery = Parser.matchGroup1("u=(.[^&|$]*)", url); | ||||||
|  |                     String query = URLDecoder.decode(escapedQuery, "UTF-8"); | ||||||
|  |                     id = Parser.matchGroup1("v=" + ID_PATTERN, query); | ||||||
|  |                 } catch (UnsupportedEncodingException uee) { | ||||||
|  |                     throw new ParsingException("Could not parse attribution_link", uee); | ||||||
|  |                 } | ||||||
|  |             } else if (lowercaseUrl.contains("youtube.com/shared?ci=")) { | ||||||
|  |                 return getRealIdFromSharedLink(url); | ||||||
|  |             } else if (url.contains("vnd.youtube")) { | ||||||
|  |                 id = Parser.matchGroup1(ID_PATTERN, url); | ||||||
|  |             } else if (url.contains("embed")) { | ||||||
|  |                 id = Parser.matchGroup1("embed/" + ID_PATTERN, url); | ||||||
|  |             } else if (url.contains("googleads")) { | ||||||
|  |                 throw new FoundAdException("Error found add: " + url); | ||||||
|  |             } else { | ||||||
|  |                 id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); | ||||||
|  |             } | ||||||
|  |         } else if (lowercaseUrl.contains("youtu.be")) { | ||||||
|  |             if (url.contains("v=")) { | ||||||
|  |                 id = Parser.matchGroup1("v=" + ID_PATTERN, url); | ||||||
|  |             } else { | ||||||
|  |                 id = Parser.matchGroup1("[Yy][Oo][Uu][Tt][Uu]\\.[Bb][Ee]/" + ID_PATTERN, url); | ||||||
|  |             } | ||||||
|  |         } else if(lowercaseUrl.contains("hooktube")) { | ||||||
|  |             if(lowercaseUrl.contains("&v=") | ||||||
|  |                     || lowercaseUrl.contains("?v=")) { | ||||||
|  |                 id = Parser.matchGroup1("[?&]v=" + ID_PATTERN, url); | ||||||
|  |             } else if (url.contains("/embed/")) { | ||||||
|  |                 id = Parser.matchGroup1("embed/" + ID_PATTERN, url); | ||||||
|  |             } else if (url.contains("/v/")) { | ||||||
|  |                 id = Parser.matchGroup1("v/" + ID_PATTERN, url); | ||||||
|  |             } else if (url.contains("/watch/")) { | ||||||
|  |                 id = Parser.matchGroup1("watch/" + ID_PATTERN, url); | ||||||
|  |             } else { | ||||||
|  |                 throw new ParsingException("Error no suitable url: " + url); | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             throw new ParsingException("Error no suitable url: " + url); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         if (!id.isEmpty()) { | ||||||
|  |             return id; | ||||||
|  |         } else { | ||||||
|  |             throw new ParsingException("Error could not parse url: " + url); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /** | ||||||
|  |      * Get the real url from a shared uri. | ||||||
|  |      * <p> | ||||||
|  |      * Shared URI's look like this: | ||||||
|  |      * <pre> | ||||||
|  |      *     * https://www.youtube.com/shared?ci=PJICrTByb3E | ||||||
|  |      *     * vnd.youtube://www.youtube.com/shared?ci=PJICrTByb3E&feature=twitter-deep-link | ||||||
|  |      * </pre> | ||||||
|  |      * | ||||||
|  |      * @param url The shared url | ||||||
|  |      * @return the id of the stream | ||||||
|  |      * @throws ParsingException | ||||||
|  |      */ | ||||||
|  |     private String getRealIdFromSharedLink(String url) throws ParsingException { | ||||||
|  |         URI uri; | ||||||
|  |         try { | ||||||
|  |             uri = new URI(url); | ||||||
|  |         } catch (URISyntaxException e) { | ||||||
|  |             throw new ParsingException("Invalid shared link", e); | ||||||
|  |         } | ||||||
|  |         String sharedId = getSharedId(uri); | ||||||
|  |         Downloader downloader = NewPipe.getDownloader(); | ||||||
|  |         String content; | ||||||
|  |         try { | ||||||
|  |             content = downloader.download("https://www.youtube.com/shared?ci=" + sharedId); | ||||||
|  |         } catch (IOException | ReCaptchaException e) { | ||||||
|  |             throw new ParsingException("Unable to resolve shared link", e); | ||||||
|  |         } | ||||||
|  |         final Document document = Jsoup.parse(content); | ||||||
|  | 
 | ||||||
|  |         final Element element = document.select("link[rel=\"canonical\"]").first(); | ||||||
|  |         final String urlWithRealId = (element != null) | ||||||
|  |                 ? element.attr("abs:href") | ||||||
|  |                 : document.select("meta[property=\"og:url\"]").first() | ||||||
|  |                     .attr("abs:content"); | ||||||
|  | 
 | ||||||
|  |         String realId = Parser.matchGroup1(ID_PATTERN, urlWithRealId); | ||||||
|  |         if (sharedId.equals(realId)) { | ||||||
|  |             throw new ParsingException("Got same id for as shared info_id: " + sharedId); | ||||||
|  |         } | ||||||
|  |         return realId; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String getSharedId(URI uri) throws ParsingException { | ||||||
|  |         if (!"/shared".equals(uri.getPath())) { | ||||||
|  |             throw new ParsingException("Not a shared link: " + uri.toString() + " (path != " + uri.getPath() + ")"); | ||||||
|  |         } | ||||||
|  |         return Parser.matchGroup1("ci=" + ID_PATTERN, uri.getQuery()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     @Override | ||||||
|  |     public boolean onAcceptUrl(final String url) throws FoundAdException { | ||||||
|  |         final String lowercaseUrl = url.toLowerCase(); | ||||||
|  |         if (lowercaseUrl.contains("youtube") | ||||||
|  |                 || lowercaseUrl.contains("youtu.be") | ||||||
|  |                 || lowercaseUrl.contains("hooktube")) { | ||||||
|  |             // bad programming I know | ||||||
|  |             try { | ||||||
|  |                 getId(url); | ||||||
|  |                 return true; | ||||||
|  |             } catch (FoundAdException fe) { | ||||||
|  |                 throw fe; | ||||||
|  |             } catch (ParsingException e) { | ||||||
|  |                 return false; | ||||||
|  |             } | ||||||
|  |         } else { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public String getUrl(String id, List<String> contentFilter, String sortFilter) throws ParsingException { | ||||||
|  | 		return "https://www.youtube.com/watch?v=" + id; | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  | @ -1,16 +1,18 @@ | ||||||
| package org.schabi.newpipe; | package org.schabi.newpipe; | ||||||
| 
 | 
 | ||||||
| import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; |  | ||||||
| 
 |  | ||||||
| import javax.net.ssl.HttpsURLConnection; |  | ||||||
| import java.io.BufferedReader; | import java.io.BufferedReader; | ||||||
| import java.io.IOException; | import java.io.IOException; | ||||||
| import java.io.InputStreamReader; | import java.io.InputStreamReader; | ||||||
| import java.net.URL; | import java.net.URL; | ||||||
| import java.net.UnknownHostException; | import java.net.UnknownHostException; | ||||||
| import java.util.HashMap; | import java.util.HashMap; | ||||||
|  | import java.util.List; | ||||||
| import java.util.Map; | import java.util.Map; | ||||||
| 
 | 
 | ||||||
|  | import javax.net.ssl.HttpsURLConnection; | ||||||
|  | 
 | ||||||
|  | import org.schabi.newpipe.extractor.DownloadResponse; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; | ||||||
| 
 | 
 | ||||||
| /* | /* | ||||||
|  * Created by Christian Schabesberger on 28.01.16. |  * Created by Christian Schabesberger on 28.01.16. | ||||||
|  | @ -62,11 +64,12 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Download the text file at the supplied URL as in download(String), | 	 * Download the text file at the supplied URL as in download(String), but set | ||||||
|      * but set the HTTP header field "Accept-Language" to the supplied string. | 	 * the HTTP header field "Accept-Language" to the supplied string. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl  the URL of the text file to return the contents of | 	 * @param siteUrl  the URL of the text file to return the contents of | ||||||
|      * @param language the language (usually a 2-character code) to set as the preferred language | 	 * @param language the language (usually a 2-character code) to set as the | ||||||
|  | 	 *                 preferred language | ||||||
| 	 * @return the contents of the specified text file | 	 * @return the contents of the specified text file | ||||||
| 	 */ | 	 */ | ||||||
| 	public String download(String siteUrl, String language) throws IOException, ReCaptchaException { | 	public String download(String siteUrl, String language) throws IOException, ReCaptchaException { | ||||||
|  | @ -75,27 +78,28 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||||
| 		return download(siteUrl, requestProperties); | 		return download(siteUrl, requestProperties); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| 	/** | 	/** | ||||||
|      * Download the text file at the supplied URL as in download(String), | 	 * Download the text file at the supplied URL as in download(String), but set | ||||||
|      * but set the HTTP header field "Accept-Language" to the supplied string. | 	 * the HTTP header field "Accept-Language" to the supplied string. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl          the URL of the text file to return the contents of | 	 * @param siteUrl          the URL of the text file to return the contents of | ||||||
| 	 * @param customProperties set request header properties | 	 * @param customProperties set request header properties | ||||||
| 	 * @return the contents of the specified text file | 	 * @return the contents of the specified text file | ||||||
| 	 * @throws IOException | 	 * @throws IOException | ||||||
| 	 */ | 	 */ | ||||||
|     public String download(String siteUrl, Map<String, String> customProperties) throws IOException, ReCaptchaException { | 	public String download(String siteUrl, Map<String, String> customProperties) | ||||||
|  | 			throws IOException, ReCaptchaException { | ||||||
| 		URL url = new URL(siteUrl); | 		URL url = new URL(siteUrl); | ||||||
| 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||||
|         for (Map.Entry<String, String> pair: customProperties.entrySet()) { | 		for (Map.Entry<String, String> pair : customProperties.entrySet()) { | ||||||
| 			con.setRequestProperty(pair.getKey(), pair.getValue()); | 			con.setRequestProperty(pair.getKey(), pair.getValue()); | ||||||
| 		} | 		} | ||||||
| 		return dl(con); | 		return dl(con); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Common functionality between download(String url) and download(String url, String language) | 	 * Common functionality between download(String url) and download(String url, | ||||||
|  | 	 * String language) | ||||||
| 	 */ | 	 */ | ||||||
| 	private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { | 	private static String dl(HttpsURLConnection con) throws IOException, ReCaptchaException { | ||||||
| 		StringBuilder response = new StringBuilder(); | 		StringBuilder response = new StringBuilder(); | ||||||
|  | @ -108,24 +112,22 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||||
| 			con.setRequestProperty("User-Agent", USER_AGENT); | 			con.setRequestProperty("User-Agent", USER_AGENT); | ||||||
| 
 | 
 | ||||||
| 			if (getCookies().length() > 0) { | 			if (getCookies().length() > 0) { | ||||||
|                 con.setRequestProperty("Cookie", getCookies()); | 				con.addRequestProperty("Cookie", getCookies()); | ||||||
| 			} | 			} | ||||||
| 
 | 
 | ||||||
|             in = new BufferedReader( | 			in = new BufferedReader(new InputStreamReader(con.getInputStream())); | ||||||
|                     new InputStreamReader(con.getInputStream())); |  | ||||||
| 			String inputLine; | 			String inputLine; | ||||||
| 
 | 
 | ||||||
| 			while ((inputLine = in.readLine()) != null) { | 			while ((inputLine = in.readLine()) != null) { | ||||||
| 				response.append(inputLine); | 				response.append(inputLine); | ||||||
| 			} | 			} | ||||||
|         } catch (UnknownHostException uhe) {//thrown when there's no internet connection | 		} catch (UnknownHostException uhe) {// thrown when there's no internet connection | ||||||
| 			throw new IOException("unknown host or no network", uhe); | 			throw new IOException("unknown host or no network", uhe); | ||||||
|             //Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); | 			// Toast.makeText(getActivity(), uhe.getMessage(), Toast.LENGTH_LONG).show(); | ||||||
| 		} catch (Exception e) { | 		} catch (Exception e) { | ||||||
| 			/* | 			/* | ||||||
|              * HTTP 429 == Too Many Request | 			 * HTTP 429 == Too Many Request Receive from Youtube.com = ReCaptcha challenge | ||||||
|              * Receive from Youtube.com = ReCaptcha challenge request | 			 * request See : https://github.com/rg3/youtube-dl/issues/5138 | ||||||
|              * See : https://github.com/rg3/youtube-dl/issues/5138 |  | ||||||
| 			 */ | 			 */ | ||||||
| 			if (con.getResponseCode() == 429) { | 			if (con.getResponseCode() == 429) { | ||||||
| 				throw new ReCaptchaException("reCaptcha Challenge requested"); | 				throw new ReCaptchaException("reCaptcha Challenge requested"); | ||||||
|  | @ -142,8 +144,8 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/** | 	/** | ||||||
|      * Download (via HTTP) the text file located at the supplied URL, and return its contents. | 	 * Download (via HTTP) the text file located at the supplied URL, and return its | ||||||
|      * Primarily intended for downloading web pages. | 	 * contents. Primarily intended for downloading web pages. | ||||||
| 	 * | 	 * | ||||||
| 	 * @param siteUrl the URL of the text file to download | 	 * @param siteUrl the URL of the text file to download | ||||||
| 	 * @return the contents of the specified text file | 	 * @return the contents of the specified text file | ||||||
|  | @ -151,7 +153,27 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||||
| 	public String download(String siteUrl) throws IOException, ReCaptchaException { | 	public String download(String siteUrl) throws IOException, ReCaptchaException { | ||||||
| 		URL url = new URL(siteUrl); | 		URL url = new URL(siteUrl); | ||||||
| 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||||
|         //HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); | 		// HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); | ||||||
| 		return dl(con); | 		return dl(con); | ||||||
| 	} | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public DownloadResponse downloadWithHeaders(String siteUrl, Map<String, List<String>> requestHeaders) | ||||||
|  | 			throws IOException, ReCaptchaException { | ||||||
|  | 		URL url = new URL(siteUrl); | ||||||
|  | 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||||
|  | 		for (Map.Entry<String, List<String>> pair : requestHeaders.entrySet()) { | ||||||
|  | 			pair.getValue().stream().forEach(value -> con.addRequestProperty(pair.getKey(), value)); | ||||||
|  | 		} | ||||||
|  | 		String responseBody = dl(con); | ||||||
|  | 		return new DownloadResponse(responseBody, con.getHeaderFields()); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Override | ||||||
|  | 	public DownloadResponse downloadWithHeaders(String siteUrl) throws IOException, ReCaptchaException { | ||||||
|  | 		URL url = new URL(siteUrl); | ||||||
|  | 		HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||||
|  | 		String responseBody = dl(con); | ||||||
|  | 		return new DownloadResponse(responseBody, con.getHeaderFields()); | ||||||
|  | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -0,0 +1,46 @@ | ||||||
|  | package org.schabi.newpipe.extractor.services.youtube; | ||||||
|  | 
 | ||||||
|  | import static org.junit.Assert.assertTrue; | ||||||
|  | import static org.schabi.newpipe.extractor.ServiceList.YouTube; | ||||||
|  | 
 | ||||||
|  | import java.io.IOException; | ||||||
|  | 
 | ||||||
|  | import org.junit.BeforeClass; | ||||||
|  | import org.junit.Test; | ||||||
|  | import org.schabi.newpipe.Downloader; | ||||||
|  | import org.schabi.newpipe.extractor.ListExtractor.InfoItemsPage; | ||||||
|  | import org.schabi.newpipe.extractor.NewPipe; | ||||||
|  | import org.schabi.newpipe.extractor.comments.CommentsInfoItem; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsExtractor; | ||||||
|  | 
 | ||||||
|  | public class YoutubeCommentsExtractorTest { | ||||||
|  | 
 | ||||||
|  | 	private static YoutubeCommentsExtractor extractor; | ||||||
|  | 
 | ||||||
|  | 	@BeforeClass | ||||||
|  | 	public static void setUp() throws Exception { | ||||||
|  | 		NewPipe.init(Downloader.getInstance()); | ||||||
|  | 		extractor = (YoutubeCommentsExtractor) YouTube | ||||||
|  | 				.getCommentsExtractor("https://www.youtube.com/watch?v=rrgFN3AxGfs"); | ||||||
|  | 		extractor.fetchPage(); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	@Test | ||||||
|  | 	public void testGetComments() throws IOException, ExtractionException { | ||||||
|  | 		boolean result = false; | ||||||
|  | 		InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); | ||||||
|  | 		result = findInComments(comments, "i should really be in the top comment.lol"); | ||||||
|  | 
 | ||||||
|  | 		while (comments.hasNextPage()) { | ||||||
|  | 			comments = extractor.getPage(comments.getNextPageUrl()); | ||||||
|  | 			result = findInComments(comments, "i should really be in the top comment.lol"); | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		assertTrue(result); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) { | ||||||
|  | 		return comments.getItems().stream().filter(c -> c.getCommentText().contains(comment)).findAny().isPresent(); | ||||||
|  | 	} | ||||||
|  | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue