Base Implementation: Parse the upload date of StreamInfoItems
In the format '2 days ago' (in English) on a YouTube channel page. (Parser extensible to other pages.)
This commit is contained in:
		
							parent
							
								
									514ed7bdc1
								
							
						
					
					
						commit
						180836c180
					
				
					 16 changed files with 316 additions and 44 deletions
				
			
		|  | @ -17,6 +17,7 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory; | |||
| import org.schabi.newpipe.extractor.playlist.PlaylistExtractor; | ||||
| import org.schabi.newpipe.extractor.search.SearchExtractor; | ||||
| import org.schabi.newpipe.extractor.stream.StreamExtractor; | ||||
| import org.schabi.newpipe.extractor.stream.TimeAgoParser; | ||||
| import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor; | ||||
| import org.schabi.newpipe.extractor.utils.Localization; | ||||
| 
 | ||||
|  | @ -222,7 +223,7 @@ public abstract class StreamingService { | |||
|     public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException { | ||||
|         return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException { | ||||
|         return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
|  | @ -230,7 +231,7 @@ public abstract class StreamingService { | |||
|     public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException { | ||||
|         return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException { | ||||
|         return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
|  | @ -287,7 +288,7 @@ public abstract class StreamingService { | |||
|     public StreamExtractor getStreamExtractor(String url) throws ExtractionException { | ||||
|         return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
|      | ||||
| 
 | ||||
|     public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException { | ||||
|         ListLinkHandlerFactory llhf = getCommentsLHFactory(); | ||||
|         if(null == llhf) { | ||||
|  | @ -296,6 +297,9 @@ public abstract class StreamingService { | |||
|         return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization()); | ||||
|     } | ||||
| 
 | ||||
|     public TimeAgoParser getTimeAgoParser() { | ||||
|         return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Figures out where the link is pointing to (a channel, a video, a playlist, etc.) | ||||
|  |  | |||
|  | @ -79,23 +79,22 @@ public class SoundcloudParsingHelper { | |||
|         return dl.head(apiUrl).getResponseCode() == 200; | ||||
|     } | ||||
| 
 | ||||
|     public static String toDateString(String time) throws ParsingException { | ||||
|     static Date parseDate(String time) throws ParsingException { | ||||
|         try { | ||||
|             Date date; | ||||
|             // Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'. | ||||
|             return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time); | ||||
|         } catch (ParseException e1) { | ||||
|             try { | ||||
|                 date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time); | ||||
|             } catch (Exception e) { | ||||
|                 date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time); | ||||
|                 return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time); | ||||
|             } catch (ParseException e2) { | ||||
|                 throw new ParsingException(e1.getMessage(), e2); | ||||
|             } | ||||
| 
 | ||||
|             SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd"); | ||||
|             return newDateFormat.format(date); | ||||
|         } catch (ParseException e) { | ||||
|             throw new ParsingException(e.getMessage(), e); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     static String toTextualDate(String time) throws ParsingException { | ||||
|         return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time)); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Call the endpoint "/resolve" of the api.<p> | ||||
|      *  | ||||
|  |  | |||
|  | @ -51,7 +51,7 @@ public class SoundcloudStreamExtractor extends StreamExtractor { | |||
|     @Nonnull | ||||
|     @Override | ||||
|     public String getUploadDate() throws ParsingException { | ||||
|         return SoundcloudParsingHelper.toDateString(track.getString("created_at")); | ||||
|         return SoundcloudParsingHelper.toTextualDate(track.getString("created_at")); | ||||
|     } | ||||
| 
 | ||||
|     @Nonnull | ||||
|  |  | |||
|  | @ -5,6 +5,8 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; | |||
| import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; | ||||
| import org.schabi.newpipe.extractor.stream.StreamType; | ||||
| 
 | ||||
| import java.util.Calendar; | ||||
| 
 | ||||
| import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps; | ||||
| 
 | ||||
| public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor { | ||||
|  | @ -41,8 +43,19 @@ public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtracto | |||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String getUploadDate() throws ParsingException { | ||||
|         return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at")); | ||||
|     public String getTextualUploadDate() throws ParsingException { | ||||
|         return SoundcloudParsingHelper.toTextualDate(getCreatedAt()); | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public Calendar getUploadDate() throws ParsingException { | ||||
|         Calendar uploadTime = Calendar.getInstance(); | ||||
|         uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt())); | ||||
|         return uploadTime; | ||||
|     } | ||||
| 
 | ||||
|     private String getCreatedAt() { | ||||
|         return itemObject.getString("created_at"); | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | |||
| import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItem; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; | ||||
| import org.schabi.newpipe.extractor.stream.TimeAgoParser; | ||||
| import org.schabi.newpipe.extractor.utils.DonationLinkHelper; | ||||
| import org.schabi.newpipe.extractor.utils.Localization; | ||||
| import org.schabi.newpipe.extractor.utils.Parser; | ||||
|  | @ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor { | |||
|     private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id="; | ||||
|     private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en"; | ||||
| 
 | ||||
|     private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); | ||||
| 
 | ||||
|     private Document doc; | ||||
| 
 | ||||
|     public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) { | ||||
|  | @ -230,7 +233,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor { | |||
|         final String uploaderUrl = getUrl(); | ||||
|         for (final Element li : element.children()) { | ||||
|             if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) { | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(li) { | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { | ||||
|                     @Override | ||||
|                     public String getUrl() throws ParsingException { | ||||
|                         try { | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH | |||
| import org.schabi.newpipe.extractor.stream.StreamInfoItem; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; | ||||
| import org.schabi.newpipe.extractor.stream.StreamType; | ||||
| import org.schabi.newpipe.extractor.stream.TimeAgoParser; | ||||
| import org.schabi.newpipe.extractor.utils.Localization; | ||||
| import org.schabi.newpipe.extractor.utils.Utils; | ||||
| 
 | ||||
|  | @ -28,6 +29,8 @@ import java.io.IOException; | |||
| @SuppressWarnings("WeakerAccess") | ||||
| public class YoutubePlaylistExtractor extends PlaylistExtractor { | ||||
| 
 | ||||
|     private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); | ||||
| 
 | ||||
|     private Document doc; | ||||
| 
 | ||||
|     public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) { | ||||
|  | @ -192,7 +195,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | |||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             collector.commit(new YoutubeStreamInfoItemExtractor(li) { | ||||
|             collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { | ||||
|                 public Element uploaderLink; | ||||
| 
 | ||||
|                 @Override | ||||
|  | @ -258,7 +261,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor { | |||
|                 } | ||||
| 
 | ||||
|                 @Override | ||||
|                 public String getUploadDate() throws ParsingException { | ||||
|                 public String getTextualUploadDate() throws ParsingException { | ||||
|                     return ""; | ||||
|                 } | ||||
| 
 | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.InfoItem; | |||
| import org.schabi.newpipe.extractor.StreamingService; | ||||
| import org.schabi.newpipe.extractor.exceptions.ExtractionException; | ||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||
| import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; | ||||
| import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; | ||||
| import org.schabi.newpipe.extractor.search.SearchExtractor; | ||||
| import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; | ||||
|  | @ -129,7 +130,7 @@ public class YoutubeSearchExtractor extends SearchExtractor { | |||
| 
 | ||||
|                 // video item type | ||||
|             } else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) { | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(el)); | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser())); | ||||
|             } else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) { | ||||
|                 collector.commit(new YoutubeChannelInfoItemExtractor(el)); | ||||
|             } else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null && | ||||
|  |  | |||
|  | @ -75,6 +75,8 @@ public class YoutubeStreamExtractor extends StreamExtractor { | |||
| 
 | ||||
|     /*//////////////////////////////////////////////////////////////////////////*/ | ||||
| 
 | ||||
|     private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); | ||||
| 
 | ||||
|     private Document doc; | ||||
|     @Nullable | ||||
|     private JsonObject playerArgs; | ||||
|  | @ -932,7 +934,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { | |||
|      * This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo. | ||||
|      */ | ||||
|     private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) { | ||||
|         return new YoutubeStreamInfoItemExtractor(li) { | ||||
|         return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { | ||||
| 
 | ||||
|             @Override | ||||
|             public String getUrl() throws ParsingException { | ||||
|  | @ -959,7 +961,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { | |||
|             } | ||||
| 
 | ||||
|             @Override | ||||
|             public String getUploadDate() throws ParsingException { | ||||
|             public String getTextualUploadDate() throws ParsingException { | ||||
|                 return ""; | ||||
|             } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,12 +1,17 @@ | |||
| package org.schabi.newpipe.extractor.services.youtube.extractors; | ||||
| 
 | ||||
| import org.jsoup.nodes.Element; | ||||
| import org.jsoup.select.Elements; | ||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||
| import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor; | ||||
| import org.schabi.newpipe.extractor.stream.StreamType; | ||||
| import org.schabi.newpipe.extractor.stream.TimeAgoParser; | ||||
| import org.schabi.newpipe.extractor.utils.Utils; | ||||
| 
 | ||||
| import javax.annotation.Nullable; | ||||
| import java.util.Calendar; | ||||
| 
 | ||||
| /* | ||||
|  * Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org> | ||||
|  * YoutubeStreamInfoItemExtractor.java is part of NewPipe. | ||||
|  | @ -28,9 +33,18 @@ import org.schabi.newpipe.extractor.utils.Utils; | |||
| public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { | ||||
| 
 | ||||
|     private final Element item; | ||||
|     private final TimeAgoParser timeAgoParser; | ||||
| 
 | ||||
|     public YoutubeStreamInfoItemExtractor(Element item) { | ||||
|     private String cachedUploadDate; | ||||
| 
 | ||||
|     /** | ||||
|      * Creates an extractor of StreamInfoItems from a YouTube page. | ||||
|      * @param item          The page element | ||||
|      * @param timeAgoParser A parser of the textual dates or {@code null}. | ||||
|      */ | ||||
|     public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) { | ||||
|         this.item = item; | ||||
|         this.timeAgoParser = timeAgoParser; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|  | @ -126,20 +140,35 @@ public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor { | |||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String getUploadDate() throws ParsingException { | ||||
|     public String getTextualUploadDate() throws ParsingException { | ||||
|         if (cachedUploadDate != null) { | ||||
|             return cachedUploadDate; | ||||
|         } | ||||
| 
 | ||||
|         try { | ||||
|             Element meta = item.select("div[class=\"yt-lockup-meta\"]").first(); | ||||
|             if (meta == null) return ""; | ||||
| 
 | ||||
|             Element li = meta.select("li").first(); | ||||
|             if(li == null) return ""; | ||||
|             final Elements li = meta.select("li"); | ||||
|             if (li.isEmpty()) return ""; | ||||
| 
 | ||||
|             return meta.select("li").first().text(); | ||||
|             return cachedUploadDate = li.first().text(); | ||||
|         } catch (Exception e) { | ||||
|             throw new ParsingException("Could not get upload date", e); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public Calendar getUploadDate() throws ParsingException { | ||||
|         String textualUploadDate = getTextualUploadDate(); | ||||
|         if (timeAgoParser != null | ||||
|                 && textualUploadDate != null && !"".equals(textualUploadDate)) { | ||||
|             return timeAgoParser.parse(textualUploadDate); | ||||
|         } else { | ||||
|             return null; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public long getViewCount() throws ParsingException { | ||||
|         String input; | ||||
|  |  | |||
|  | @ -35,12 +35,15 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH | |||
| import org.schabi.newpipe.extractor.stream.StreamInfoItem; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector; | ||||
| import org.schabi.newpipe.extractor.utils.Localization; | ||||
| import org.schabi.newpipe.extractor.stream.TimeAgoParser; | ||||
| 
 | ||||
| import javax.annotation.Nonnull; | ||||
| import java.io.IOException; | ||||
| 
 | ||||
| public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> { | ||||
| 
 | ||||
|     private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser(); | ||||
| 
 | ||||
|     private Document doc; | ||||
| 
 | ||||
|     public YoutubeTrendingExtractor(StreamingService service, | ||||
|  | @ -93,7 +96,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> { | |||
|         for(Element ul : uls) { | ||||
|             for(final Element li : ul.children()) { | ||||
|                 final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first(); | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(li) { | ||||
|                 collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) { | ||||
|                     @Override | ||||
|                     public String getUrl() throws ParsingException { | ||||
|                         try { | ||||
|  |  | |||
|  | @ -22,6 +22,8 @@ package org.schabi.newpipe.extractor.stream; | |||
| 
 | ||||
| import org.schabi.newpipe.extractor.InfoItem; | ||||
| 
 | ||||
| import java.util.Calendar; | ||||
| 
 | ||||
| /** | ||||
|  * Info object for previews of unopened videos, eg search results, related videos | ||||
|  */ | ||||
|  | @ -29,7 +31,8 @@ public class StreamInfoItem extends InfoItem { | |||
|     private final StreamType streamType; | ||||
| 
 | ||||
|     private String uploaderName; | ||||
|     private String uploadDate; | ||||
|     private String textualUploadDate; | ||||
|     private Calendar uploadDate; | ||||
|     private long viewCount = -1; | ||||
|     private long duration = -1; | ||||
| 
 | ||||
|  | @ -52,14 +55,6 @@ public class StreamInfoItem extends InfoItem { | |||
|         this.uploaderName = uploader_name; | ||||
|     } | ||||
| 
 | ||||
|     public String getUploadDate() { | ||||
|         return uploadDate; | ||||
|     } | ||||
| 
 | ||||
|     public void setUploadDate(String upload_date) { | ||||
|         this.uploadDate = upload_date; | ||||
|     } | ||||
| 
 | ||||
|     public long getViewCount() { | ||||
|         return viewCount; | ||||
|     } | ||||
|  | @ -84,12 +79,36 @@ public class StreamInfoItem extends InfoItem { | |||
|         this.uploaderUrl = uploaderUrl; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @return The original textual upload date as returned by the streaming service. | ||||
|      * @see #getUploadDate() | ||||
|      */ | ||||
|     public String getTextualUploadDate() { | ||||
|         return textualUploadDate; | ||||
|     } | ||||
| 
 | ||||
|     public void setTextualUploadDate(String upload_date) { | ||||
|         this.textualUploadDate = upload_date; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @return The (approximated) date and time this item was uploaded or {@code null}. | ||||
|      * @see #getTextualUploadDate() | ||||
|      */ | ||||
|     public Calendar getUploadDate() { | ||||
|         return uploadDate; | ||||
|     } | ||||
| 
 | ||||
|     public void setUploadDate(Calendar uploadDate) { | ||||
|         this.uploadDate = uploadDate; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public String toString() { | ||||
|         return "StreamInfoItem{" + | ||||
|                 "streamType=" + streamType + | ||||
|                 ", uploaderName='" + uploaderName + '\'' + | ||||
|                 ", uploadDate='" + uploadDate + '\'' + | ||||
|                 ", textualUploadDate='" + textualUploadDate + '\'' + | ||||
|                 ", viewCount=" + viewCount + | ||||
|                 ", duration=" + duration + | ||||
|                 ", uploaderUrl='" + uploaderUrl + '\'' + | ||||
|  |  | |||
|  | @ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.stream; | |||
| import org.schabi.newpipe.extractor.InfoItemExtractor; | ||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||
| 
 | ||||
| import java.util.Calendar; | ||||
| 
 | ||||
| /* | ||||
|  * Created by Christian Schabesberger on 28.02.16. | ||||
|  * | ||||
|  | @ -64,10 +66,30 @@ public interface StreamInfoItemExtractor extends InfoItemExtractor { | |||
|     String getUploaderUrl() throws ParsingException; | ||||
| 
 | ||||
|     /** | ||||
|      * Extract the uploader name | ||||
|      * @return the uploader name | ||||
|      * @throws ParsingException thrown if there is an error in the extraction | ||||
|      * Extract the textual upload date of this item. | ||||
|      * The original textual date provided by the service may be used if it is short; | ||||
|      * otherwise the format "yyyy-MM-dd" or an locale specific version is preferred. | ||||
|      * | ||||
|      * @return The original textual upload date. | ||||
|      * @throws ParsingException if there is an error in the extraction | ||||
|      * @see #getUploadDate() | ||||
|      */ | ||||
|     String getUploadDate() throws ParsingException; | ||||
|     String getTextualUploadDate() throws ParsingException; | ||||
| 
 | ||||
|     /** | ||||
|      * Extracts the upload date and time of this item and parses it. | ||||
|      * <p> | ||||
|      *     If the service doesn't provide an exact time, an approximation can be returned. | ||||
|      *     The approximation should be marked by setting seconds and milliseconds to zero. | ||||
|      *     <br> | ||||
|      *     If the service doesn't provide any date at all, then {@code null} should be returned. | ||||
|      * </p> | ||||
|      * | ||||
|      * @return The (approximated) date and time this item was uploaded or {@code null}. | ||||
|      * @throws ParsingException if there is an error in the extraction | ||||
|      *                          or the extracted date couldn't be parsed. | ||||
|      * @see #getTextualUploadDate() | ||||
|      */ | ||||
|     Calendar getUploadDate() throws ParsingException; | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -61,10 +61,15 @@ public class StreamInfoItemsCollector extends InfoItemsCollector<StreamInfoItem, | |||
|             addError(e); | ||||
|         } | ||||
|         try { | ||||
|             resultItem.setUploadDate(extractor.getUploadDate()); | ||||
|             resultItem.setTextualUploadDate(extractor.getTextualUploadDate()); | ||||
|         } catch (Exception e) { | ||||
|             addError(e); | ||||
|         } | ||||
|         try { | ||||
|             resultItem.setUploadDate(extractor.getUploadDate()); | ||||
|         } catch (ParsingException e) { | ||||
|             addError(e); | ||||
|         } | ||||
|         try { | ||||
|             resultItem.setViewCount(extractor.getViewCount()); | ||||
|         } catch (Exception e) { | ||||
|  |  | |||
|  | @ -0,0 +1,158 @@ | |||
| package org.schabi.newpipe.extractor.stream; | ||||
| 
 | ||||
| /* | ||||
|  * Created by wojcik.online on 2018-01-25. | ||||
|  */ | ||||
| 
 | ||||
| import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||
| 
 | ||||
| import java.util.Calendar; | ||||
| import java.util.Collection; | ||||
| import java.util.Collections; | ||||
| import java.util.EnumMap; | ||||
| import java.util.Map; | ||||
| 
 | ||||
| /** | ||||
|  * A helper class that is meant to be used by services that need to parse upload dates in the | ||||
|  * format '2 days ago' or similar. | ||||
|  */ | ||||
| public class TimeAgoParser { | ||||
| 
 | ||||
|     /** | ||||
|      * A set of english phrases that are contained in the time units. | ||||
|      * (e.g. '7 minutes ago' contains 'min') | ||||
|      */ | ||||
|     public static Map<TimeAgoUnit, Collection<String>> DEFAULT_AGO_PHRASES = | ||||
|             new EnumMap<>(TimeAgoUnit.class); | ||||
| 
 | ||||
|     private final Map<TimeAgoUnit, Collection<String>> agoPhrases; | ||||
| 
 | ||||
|     private final Calendar consistentNow; | ||||
| 
 | ||||
|     /** | ||||
|      * Creates a helper to parse upload dates in the format '2 days ago'. | ||||
|      * <p> | ||||
|      *     Instantiate a new {@link TimeAgoParser} every time you extract a new batch of items. | ||||
|      * </p> | ||||
|      * @param agoPhrases A set of phrases how to recognize the time units in a given language. | ||||
|      */ | ||||
|     public TimeAgoParser(Map<TimeAgoUnit, Collection<String>> agoPhrases) { | ||||
|         this.agoPhrases = agoPhrases; | ||||
|         consistentNow = Calendar.getInstance(); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Parses a textual date in the format '2 days ago' into a Calendar representation. | ||||
|      * Beginning with days ago, marks the date as approximated by setting minutes, seconds | ||||
|      * and milliseconds to 0. | ||||
|      * @param textualDate The original date as provided by the streaming service | ||||
|      * @return The parsed (approximated) time | ||||
|      * @throws ParsingException if the time unit could not be recognized | ||||
|      */ | ||||
|     public Calendar parse(String textualDate) throws ParsingException { | ||||
|         int timeAgoAmount; | ||||
|         try { | ||||
|             timeAgoAmount = parseTimeAgoAmount(textualDate); | ||||
|         } catch (NumberFormatException e) { | ||||
|             // If there is no valid number in the textual date, | ||||
|             // assume it is 1 (as in 'a second ago'). | ||||
|             timeAgoAmount = 1; | ||||
|         } | ||||
| 
 | ||||
|         TimeAgoUnit timeAgoUnit = parseTimeAgoUnit(textualDate); | ||||
|         return getCalendar(timeAgoAmount, timeAgoUnit); | ||||
|     } | ||||
| 
 | ||||
|     private int parseTimeAgoAmount(String textualDate) throws NumberFormatException { | ||||
|         String timeValueStr = textualDate.replaceAll("\\D+", ""); | ||||
|         return Integer.parseInt(timeValueStr); | ||||
|     } | ||||
| 
 | ||||
|     private TimeAgoUnit parseTimeAgoUnit(String textualDate) throws ParsingException { | ||||
|         for (TimeAgoUnit timeAgoUnit : agoPhrases.keySet()) { | ||||
|             for (String agoPhrase : agoPhrases.get(timeAgoUnit)) { | ||||
|                 if (textualDate.toLowerCase().contains(agoPhrase.toLowerCase())){ | ||||
|                     return timeAgoUnit; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         throw new ParsingException("Unable to parse the date: " + textualDate); | ||||
|     } | ||||
| 
 | ||||
|     private Calendar getCalendar(int timeAgoAmount, TimeAgoUnit timeAgoUnit) { | ||||
|         Calendar calendarTime = getNow(); | ||||
| 
 | ||||
|         switch (timeAgoUnit) { | ||||
|             case SECONDS: | ||||
|                 calendarTime.add(Calendar.SECOND, -timeAgoAmount); | ||||
|                 break; | ||||
| 
 | ||||
|             case MINUTES: | ||||
|                 calendarTime.add(Calendar.MINUTE, -timeAgoAmount); | ||||
|                 break; | ||||
| 
 | ||||
|             case HOURS: | ||||
|                 calendarTime.add(Calendar.HOUR_OF_DAY, -timeAgoAmount); | ||||
|                 break; | ||||
| 
 | ||||
|             case DAYS: | ||||
|                 calendarTime.add(Calendar.DAY_OF_MONTH, -timeAgoAmount); | ||||
|                 markApproximatedTime(calendarTime); | ||||
|                 break; | ||||
| 
 | ||||
|             case WEEKS: | ||||
|                 calendarTime.add(Calendar.WEEK_OF_YEAR, -timeAgoAmount); | ||||
|                 markApproximatedTime(calendarTime); | ||||
|                 break; | ||||
| 
 | ||||
|             case MONTHS: | ||||
|                 calendarTime.add(Calendar.MONTH, -timeAgoAmount); | ||||
|                 markApproximatedTime(calendarTime); | ||||
|                 break; | ||||
| 
 | ||||
|             case YEARS: | ||||
|                 calendarTime.add(Calendar.YEAR, -timeAgoAmount); | ||||
|                 // Prevent `PrettyTime` from showing '12 months ago'. | ||||
|                 calendarTime.add(Calendar.DAY_OF_MONTH, -1); | ||||
|                 markApproximatedTime(calendarTime); | ||||
|                 break; | ||||
|         } | ||||
| 
 | ||||
|         return calendarTime; | ||||
|     } | ||||
| 
 | ||||
|     private Calendar getNow() { | ||||
|         return (Calendar) consistentNow.clone(); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Marks the time as approximated by setting minutes, seconds and milliseconds to 0. | ||||
|      * @param calendarTime Time to be marked as approximated | ||||
|      */ | ||||
|     private void markApproximatedTime(Calendar calendarTime) { | ||||
|         calendarTime.set(Calendar.MINUTE, 0); | ||||
|         calendarTime.set(Calendar.SECOND, 0); | ||||
|         calendarTime.set(Calendar.MILLISECOND, 0); | ||||
|     } | ||||
| 
 | ||||
|     static { | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.SECONDS, Collections.singleton("sec")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MINUTES, Collections.singleton("min")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.HOURS, Collections.singleton("hour")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.DAYS, Collections.singleton("day")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.WEEKS, Collections.singleton("week")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.MONTHS, Collections.singleton("month")); | ||||
|         DEFAULT_AGO_PHRASES.put(TimeAgoUnit.YEARS, Collections.singleton("year")); | ||||
|     } | ||||
| 
 | ||||
|     public enum TimeAgoUnit { | ||||
|         SECONDS, | ||||
|         MINUTES, | ||||
|         HOURS, | ||||
|         DAYS, | ||||
|         WEEKS, | ||||
|         MONTHS, | ||||
|         YEARS, | ||||
|     } | ||||
| } | ||||
|  | @ -41,6 +41,7 @@ import static java.util.Collections.singletonList; | |||
| public class Downloader implements org.schabi.newpipe.extractor.Downloader { | ||||
| 
 | ||||
|     private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0"; | ||||
|     private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en"; | ||||
|     private static String mCookies = ""; | ||||
| 
 | ||||
|     private static Downloader instance = null; | ||||
|  | @ -171,6 +172,7 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader { | |||
|         URL url = new URL(siteUrl); | ||||
|         HttpsURLConnection con = (HttpsURLConnection) url.openConnection(); | ||||
|         // HttpsURLConnection con = NetCipher.getHttpsURLConnection(url); | ||||
|         con.setRequestProperty("Accept-Language", DEFAULT_HTTP_ACCEPT_LANGUAGE); | ||||
|         return dl(con); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -4,6 +4,7 @@ import org.schabi.newpipe.extractor.InfoItem; | |||
| import org.schabi.newpipe.extractor.ListExtractor; | ||||
| import org.schabi.newpipe.extractor.stream.StreamInfoItem; | ||||
| 
 | ||||
| import java.util.Calendar; | ||||
| import java.util.List; | ||||
| 
 | ||||
| import static org.junit.Assert.*; | ||||
|  | @ -27,6 +28,14 @@ public final class DefaultTests { | |||
|                 StreamInfoItem streamInfoItem = (StreamInfoItem) item; | ||||
|                 assertNotEmpty("Uploader name not set: " + item, streamInfoItem.getUploaderName()); | ||||
|                 assertNotEmpty("Uploader url not set: " + item, streamInfoItem.getUploaderUrl()); | ||||
| 
 | ||||
|                 final String textualUploadDate = streamInfoItem.getTextualUploadDate(); | ||||
|                 if (textualUploadDate != null && !textualUploadDate.isEmpty()) { | ||||
|                     final Calendar uploadDate = streamInfoItem.getUploadDate(); | ||||
|                     assertNotNull("No parsed upload date", uploadDate); | ||||
|                     assertTrue("Upload date not in the past", uploadDate.before(Calendar.getInstance())); | ||||
|                 } | ||||
| 
 | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue