Implement time ago parser and improve localization handling

- Handle special cases for languages where the number is not shown
- Rework the Downloader base implementation, allowing for more
advanced things to be done
- Separate the localization from the content country (just like
YouTube let's the user choose both).
This commit is contained in:
Mauricio Colli 2019-04-28 17:03:16 -03:00
parent 180836c180
commit 3638f0e0ea
No known key found for this signature in database
GPG key ID: F200BFD6F29DDD85
274 changed files with 4770 additions and 3468 deletions

View file

@ -0,0 +1,116 @@
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.timeago.PatternsHolder;
import org.schabi.newpipe.extractor.timeago.TimeAgoUnit;
import java.io.*;
import java.util.*;
public class GeneratePatternClasses {
public static void main(String[] args) throws FileNotFoundException, JsonParserException {
final InputStream resourceAsStream =
new FileInputStream("timeago-parser/raw/unique_patterns.json");
final JsonObject from = JsonParser.object().from(resourceAsStream);
final TreeMap<String, Object> map = new TreeMap<>(from);
for (Map.Entry<String, Object> entry : map.entrySet()) {
final String languageCode = entry.getKey().replace('-', '_');
final Map<String, Object> unitsList = (Map<String, Object>) entry.getValue();
final String wordSeparator = (String) unitsList.get("word_separator");
final JsonArray seconds = (JsonArray) unitsList.get("seconds");
final JsonArray minutes = (JsonArray) unitsList.get("minutes");
final JsonArray hours = (JsonArray) unitsList.get("hours");
final JsonArray days = (JsonArray) unitsList.get("days");
final JsonArray weeks = (JsonArray) unitsList.get("weeks");
final JsonArray months = (JsonArray) unitsList.get("months");
final JsonArray years = (JsonArray) unitsList.get("years");
final StringBuilder specialCasesString = new StringBuilder();
specialCasesConstruct(TimeAgoUnit.SECONDS, seconds, specialCasesString);
specialCasesConstruct(TimeAgoUnit.MINUTES, minutes, specialCasesString);
specialCasesConstruct(TimeAgoUnit.HOURS, hours, specialCasesString);
specialCasesConstruct(TimeAgoUnit.DAYS, days, specialCasesString);
specialCasesConstruct(TimeAgoUnit.WEEKS, weeks, specialCasesString);
specialCasesConstruct(TimeAgoUnit.MONTHS, months, specialCasesString);
specialCasesConstruct(TimeAgoUnit.YEARS, years, specialCasesString);
System.out.println("Generating \"" + languageCode + "\" pattern class...");
try (final FileWriter fileOut = new FileWriter(
"timeago-parser/src/main/java/org/schabi/newpipe/extractor/timeago/patterns/" +
languageCode + ".java")) {
final String test = INFO_CLASS_GENERATED + "\n" +
"\n" +
"package org.schabi.newpipe.extractor.timeago.patterns;\n\n" +
"import org.schabi.newpipe.extractor.timeago.PatternsHolder;\n" +
(specialCasesString.length() > 0 ? "import org.schabi.newpipe.extractor.timeago.TimeAgoUnit;\n" : "") +
"\n" +
"public class " + languageCode + " extends PatternsHolder {\n" +
" private static final String WORD_SEPARATOR = \"" + wordSeparator + "\";\n" +
" private static final String[]\n" +
" SECONDS /**/ = {" + join(seconds) + "},\n" +
" MINUTES /**/ = {" + join(minutes) + "},\n" +
" HOURS /**/ = {" + join(hours) + "},\n" +
" DAYS /**/ = {" + join(days) + "},\n" +
" WEEKS /**/ = {" + join(weeks) + "},\n" +
" MONTHS /**/ = {" + join(months) + "},\n" +
" YEARS /**/ = {" + join(years) + "};\n" +
"\n" +
" private static final " + languageCode + " INSTANCE = new " + languageCode + "();\n" +
"\n" +
" public static " + languageCode + " getInstance() {\n" +
" return INSTANCE;\n" +
" }\n" +
"\n" +
" private " + languageCode + "() {\n" +
" super(WORD_SEPARATOR, SECONDS, MINUTES, HOURS, DAYS, WEEKS, MONTHS, YEARS);\n" +
specialCasesString.toString() +
" }\n" +
"}";
fileOut.write(test);
} catch (IOException e) {
e.printStackTrace();
}
}
}
private static void specialCasesConstruct(TimeAgoUnit unit, JsonArray array, StringBuilder stringBuilder) {
final Iterator<Object> iterator = array.iterator();
while (iterator.hasNext()) {
final Object o = iterator.next();
if (o instanceof JsonObject) {
final JsonObject caseObject = (JsonObject) o;
for (Map.Entry<String, Object> caseEntry : caseObject.entrySet()) {
final int caseAmount = Integer.parseInt(caseEntry.getKey());
final String caseText = (String) caseEntry.getValue();
iterator.remove();
stringBuilder.append(" ")
.append("putSpecialCase(TimeAgoUnit.").append(unit.name())
.append(", \"").append(caseText).append("\"")
.append(", ").append(caseAmount).append(");").append("\n");
}
}
}
}
private static final String INFO_CLASS_GENERATED = "/**/// DO NOT MODIFY THIS FILE MANUALLY\n" +
"/**/// This class was automatically generated by \"GeneratePatternClasses.java\",\n" +
"/**/// modify the \"unique_patterns.json\" and re-generate instead.";
private static String join(List<Object> list) {
final StringBuilder toReturn = new StringBuilder();
for (Object o : list) {
toReturn.append('"').append(o).append('"').append(", ");
}
toReturn.setLength(Math.max(toReturn.length() - 2, 0));
return toReturn.toString();
}
}

View file

@ -1,59 +0,0 @@
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import static org.schabi.newpipe.extractor.timeago.TimeAgoPatternsManager.RESOURCE_BUNDLE_ARRAY_SEPARATOR;
public class GenerateResourceBundles {
public static void main(String[] args) throws Exception {
File outDir = new File("timeago-parser/outBundle");
if (!outDir.isDirectory()) outDir.mkdir();
JsonObject object = JsonParser.object().from(new FileInputStream(new File("timeago-parser/raw/unique_patterns.json")));
for (Map.Entry<String, Object> langTimeEntry : new TreeMap<>(object).entrySet()) {
final String langName = langTimeEntry.getKey();
StringBuilder outString = new StringBuilder();
final TreeMap<String, Object> sortedMap = new TreeMap<>(Utils.compareByUnitName());
sortedMap.putAll((JsonObject) langTimeEntry.getValue());
final Iterator<Map.Entry<String, Object>> unitEntriesIterator = sortedMap.entrySet().iterator();
while (unitEntriesIterator.hasNext()) {
final Map.Entry<String, Object> unitEntry = unitEntriesIterator.next();
final String unitName = unitEntry.getKey();
final List<Object> unitList = (JsonArray) unitEntry.getValue();
outString.append(unitName).append("=\\\n");
for (int i = 0; i < unitList.size(); i++) {
final String s = unitList.get(i).toString();
outString.append(" ").append(s);
if (i < unitList.size() - 1) {
outString.append(RESOURCE_BUNDLE_ARRAY_SEPARATOR).append("\\").append("\n");
}
}
if (unitEntriesIterator.hasNext()) outString.append("\n\n");
}
String fileName = "time_units_" + langName.replaceAll("-", "_") + ".properties";
System.out.println("Writing " + fileName + "...");
try (OutputStream out = new FileOutputStream(new File(outDir, fileName))) {
out.write(outString.toString().getBytes("UTF-8"));
}
}
}
}

View file

@ -1,5 +1,6 @@
{
"af": {
"word_separator": " ",
"seconds": [
"sekonde",
"sekondes"
@ -29,6 +30,7 @@
]
},
"am": {
"word_separator": " ",
"seconds": [
"ሰኮንዶች",
"ሴኮንድ"
@ -59,6 +61,7 @@
]
},
"ar": {
"word_separator": " ",
"seconds": [
"ثانية",
"ثانيتين",
@ -97,6 +100,7 @@
]
},
"az": {
"word_separator": " ",
"seconds": [
"saniyə"
],
@ -120,37 +124,48 @@
]
},
"be": {
"word_separator": " ",
"seconds": [
"секунд",
"секунду",
"секунды"
],
"minutes": [
"хвілін",
"хвіліну",
"хвіліны"
],
"hours": [
"гадзін",
"гадзіну",
"гадзіны"
],
"days": [
"дзень",
"дня"
"дзён",
"дня",
"дні"
],
"weeks": [
"тыдзень",
"тыдня"
"тыдня",
"тыдні"
],
"months": [
"месяц",
"месяца"
"месяца",
"месяцы",
"месяцаў"
],
"years": [
"год",
"года"
"года",
"гады",
"гадоў"
]
},
"bg": {
"word_separator": " ",
"seconds": [
"секунда",
"секунди"
@ -181,6 +196,7 @@
]
},
"bn": {
"word_separator": " ",
"seconds": [
"সেকেন্ড"
],
@ -204,17 +220,22 @@
]
},
"bs": {
"word_separator": " ",
"seconds": [
"sekundi",
"sekunde",
"sekundu"
],
"minutes": [
"minuta",
"minute",
"minutu"
],
"hours": [
"h",
"sat"
"sat",
"sata",
"sati"
],
"days": [
"dan",
@ -225,15 +246,18 @@
],
"months": [
"mj.",
"mjesec"
"mjesec",
"mjeseca",
"mjeseci"
],
"years": [
"godine",
"godina",
"godine",
"godinu"
]
},
"ca": {
"word_separator": " ",
"seconds": [
"segon",
"segons"
@ -264,6 +288,7 @@
]
},
"cs": {
"word_separator": " ",
"seconds": [
"sekundami",
"sekundou"
@ -290,10 +315,12 @@
],
"years": [
"rokem",
"roky"
"roky",
"lety"
]
},
"da": {
"word_separator": " ",
"seconds": [
"sekund",
"sekunder"
@ -323,6 +350,7 @@
]
},
"de": {
"word_separator": " ",
"seconds": [
"Sekunde",
"Sekunden"
@ -353,6 +381,7 @@
]
},
"el": {
"word_separator": " ",
"seconds": [
"δευτερόλεπτα",
"δευτερόλεπτο"
@ -383,6 +412,7 @@
]
},
"en": {
"word_separator": " ",
"seconds": [
"second",
"seconds"
@ -413,6 +443,7 @@
]
},
"en-GB": {
"word_separator": " ",
"seconds": [
"second",
"seconds"
@ -443,6 +474,7 @@
]
},
"es": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -473,6 +505,7 @@
]
},
"es-419": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -503,6 +536,7 @@
]
},
"es-US": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -533,6 +567,7 @@
]
},
"et": {
"word_separator": " ",
"seconds": [
"sekund",
"sekundit"
@ -563,6 +598,7 @@
]
},
"eu": {
"word_separator": " ",
"seconds": [
"segundo"
],
@ -589,6 +625,7 @@
]
},
"fa": {
"word_separator": " ",
"seconds": [
"ثانیه"
],
@ -612,6 +649,7 @@
]
},
"fi": {
"word_separator": " ",
"seconds": [
"sekunti",
"sekuntia"
@ -642,6 +680,7 @@
]
},
"fil": {
"word_separator": " ",
"seconds": [
"segundo"
],
@ -665,6 +704,7 @@
]
},
"fr": {
"word_separator": " ",
"seconds": [
"seconde",
"secondes"
@ -694,6 +734,7 @@
]
},
"fr-CA": {
"word_separator": " ",
"seconds": [
"seconde",
"secondes"
@ -723,6 +764,7 @@
]
},
"gl": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -753,6 +795,7 @@
]
},
"gu": {
"word_separator": " ",
"seconds": [
"સેકંડ"
],
@ -776,6 +819,7 @@
]
},
"hi": {
"word_separator": " ",
"seconds": [
"सेकंड"
],
@ -790,7 +834,8 @@
"दिन"
],
"weeks": [
"सप्ताह"
"सप्ताह",
"हफ़्ते"
],
"months": [
"महीना",
@ -801,6 +846,7 @@
]
},
"hr": {
"word_separator": " ",
"seconds": [
"sekunde",
"sekundi",
@ -836,6 +882,7 @@
]
},
"hu": {
"word_separator": " ",
"seconds": [
"másodperce"
],
@ -859,6 +906,7 @@
]
},
"hy": {
"word_separator": " ",
"seconds": [
"վայրկյան"
],
@ -882,6 +930,7 @@
]
},
"id": {
"word_separator": " ",
"seconds": [
"detik"
],
@ -905,25 +954,41 @@
]
},
"is": {
"word_separator": " ",
"seconds": [
"sekúndu",
"sekúndum"
"sekúndum",
"second",
"seconds"
],
"minutes": [
"mínútu",
"mínútum"
"mínútum",
"minute",
"minutes"
],
"hours": [
"klukkustund",
"klukkustundum"
"klukkustundum",
"hour",
"hours"
],
"days": [
"degi",
"dögum"
"dögum",
"day",
"days"
],
"weeks": [
"viku",
"vikum"
"vikum",
"week",
"weeks"
],
"months": [
"mánuði",
@ -935,6 +1000,7 @@
]
},
"it": {
"word_separator": " ",
"seconds": [
"secondi",
"secondo"
@ -965,6 +1031,7 @@
]
},
"iw": {
"word_separator": " ",
"seconds": [
"שניות",
"שנייה"
@ -975,26 +1042,42 @@
],
"hours": [
"שעה",
"שעות"
"שעות",
{
"2": "שעתיים"
}
],
"days": [
"יום",
"ימים"
"ימים",
{
"2": "יומיים"
}
],
"weeks": [
"שבוע",
"שבועות"
"שבועות",
{
"2": "שבועיים"
}
],
"months": [
"חודש",
"חודשים"
"חודשים",
{
"2": "חודשיים"
}
],
"years": [
"שנה",
"שנים"
"שנים",
{
"2": "שנתיים"
}
]
},
"ja": {
"word_separator": "",
"seconds": [
"秒前"
],
@ -1018,6 +1101,7 @@
]
},
"ka": {
"word_separator": " ",
"seconds": [
"წამის"
],
@ -1041,6 +1125,7 @@
]
},
"kk": {
"word_separator": " ",
"seconds": [
"секунд"
],
@ -1064,6 +1149,7 @@
]
},
"km": {
"word_separator": "",
"seconds": [
"វិនាទី\u200bមុន",
"១វិនាទីមុន"
@ -1094,6 +1180,7 @@
]
},
"kn": {
"word_separator": " ",
"seconds": [
"ಸೆಕೆಂಡುಗಳ",
"ಸೆಕೆಂಡ್"
@ -1124,6 +1211,7 @@
]
},
"ko": {
"word_separator": "",
"seconds": [
"초"
],
@ -1147,6 +1235,7 @@
]
},
"ky": {
"word_separator": " ",
"seconds": [
"секунд"
],
@ -1170,6 +1259,7 @@
]
},
"lo": {
"word_separator": "",
"seconds": [
"ວິນາທີກ່ອນນີ້"
],
@ -1194,6 +1284,7 @@
]
},
"lt": {
"word_separator": " ",
"seconds": [
"sekundes",
"sekundę",
@ -1228,6 +1319,7 @@
]
},
"lv": {
"word_separator": " ",
"seconds": [
"sekundes",
"sekundēm"
@ -1259,6 +1351,7 @@
]
},
"mk": {
"word_separator": " ",
"seconds": [
"секунда",
"секунди"
@ -1289,6 +1382,7 @@
]
},
"ml": {
"word_separator": "",
"seconds": [
"സെക്കന്റ്",
"സെക്കൻഡ്"
@ -1314,6 +1408,7 @@
]
},
"mn": {
"word_separator": " ",
"seconds": [
"секундын"
],
@ -1338,6 +1433,7 @@
]
},
"mr": {
"word_separator": "",
"seconds": [
"सेकंदांपूर्वी",
"सेकंदापूर्वी"
@ -1368,6 +1464,7 @@
]
},
"ms": {
"word_separator": " ",
"seconds": [
"saat"
],
@ -1391,6 +1488,7 @@
]
},
"my": {
"word_separator": " ",
"seconds": [
"စက္ကန့်"
],
@ -1414,6 +1512,7 @@
]
},
"ne": {
"word_separator": " ",
"seconds": [
"सेकेन्ड"
],
@ -1437,6 +1536,7 @@
]
},
"nl": {
"word_separator": " ",
"seconds": [
"seconde",
"seconden"
@ -1465,6 +1565,7 @@
]
},
"no": {
"word_separator": " ",
"seconds": [
"sekund",
"sekunder"
@ -1493,6 +1594,7 @@
]
},
"pa": {
"word_separator": " ",
"seconds": [
"ਸਕਿੰਟ"
],
@ -1519,6 +1621,7 @@
]
},
"pl": {
"word_separator": " ",
"seconds": [
"sekund",
"sekundy",
@ -1554,6 +1657,7 @@
]
},
"pt": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -1584,6 +1688,7 @@
]
},
"pt-PT": {
"word_separator": " ",
"seconds": [
"segundo",
"segundos"
@ -1614,6 +1719,7 @@
]
},
"ro": {
"word_separator": " ",
"seconds": [
"secunde",
"secundă"
@ -1644,6 +1750,7 @@
]
},
"ru": {
"word_separator": " ",
"seconds": [
"секунд",
"секунду",
@ -1681,6 +1788,7 @@
]
},
"si": {
"word_separator": " ",
"seconds": [
"තත්පර"
],
@ -1704,6 +1812,7 @@
]
},
"sk": {
"word_separator": " ",
"seconds": [
"sekundami",
"sekundou"
@ -1734,6 +1843,7 @@
]
},
"sl": {
"word_separator": " ",
"seconds": [
"sekundama",
"sekundami",
@ -1771,6 +1881,7 @@
]
},
"sq": {
"word_separator": " ",
"seconds": [
"sekonda",
"sekondë"
@ -1797,6 +1908,7 @@
]
},
"sr": {
"word_separator": " ",
"seconds": [
"секунде",
"секунди"
@ -1810,8 +1922,12 @@
"сати"
],
"days": [
"дан",
"дана"
"Пре 1 дан",
"Пре 2 дана",
"Пре 3 дана",
"Пре 4 дана",
"Пре 5 дана",
"Пре 6 дана"
],
"weeks": [
"недеље",
@ -1829,6 +1945,7 @@
]
},
"sr-Latn": {
"word_separator": " ",
"seconds": [
"sekunde",
"sekundi"
@ -1838,11 +1955,16 @@
],
"hours": [
"sat",
"sati"
"sati",
"sata"
],
"days": [
"dan",
"dana"
"Pre 1 dan",
"Pre 2 dana",
"Pre 3 dana",
"Pre 4 dana",
"Pre 5 dana",
"Pre 6 dana"
],
"weeks": [
"nedelja",
@ -1861,6 +1983,7 @@
]
},
"sv": {
"word_separator": " ",
"seconds": [
"sekund",
"sekunder"
@ -1890,6 +2013,7 @@
]
},
"sw": {
"word_separator": " ",
"seconds": [
"sekunde"
],
@ -1915,6 +2039,7 @@
]
},
"ta": {
"word_separator": " ",
"seconds": [
"வினாடி",
"வினாடிகளுக்கு"
@ -1944,6 +2069,7 @@
]
},
"te": {
"word_separator": " ",
"seconds": [
"సెకను",
"సెకన్ల"
@ -1974,6 +2100,7 @@
]
},
"th": {
"word_separator": "",
"seconds": [
"วินาทีที่ผ่านมา"
],
@ -1997,6 +2124,7 @@
]
},
"tr": {
"word_separator": " ",
"seconds": [
"saniye"
],
@ -2020,6 +2148,7 @@
]
},
"uk": {
"word_separator": " ",
"seconds": [
"секунд",
"секунди",
@ -2056,6 +2185,7 @@
]
},
"ur": {
"word_separator": " ",
"seconds": [
"سیکنڈ",
"سیکنڈز"
@ -2083,6 +2213,7 @@
]
},
"uz": {
"word_separator": " ",
"seconds": [
"soniya"
],
@ -2106,6 +2237,7 @@
]
},
"vi": {
"word_separator": " ",
"seconds": [
"giây"
],
@ -2113,7 +2245,8 @@
"phút"
],
"hours": [
"giờ"
"giờ",
"tiếng"
],
"days": [
"ngày"
@ -2129,6 +2262,7 @@
]
},
"zh-CN": {
"word_separator": "",
"seconds": [
"秒前"
],
@ -2152,6 +2286,7 @@
]
},
"zh-HK": {
"word_separator": "",
"seconds": [
"秒前"
],
@ -2175,6 +2310,7 @@
]
},
"zh-TW": {
"word_separator": "",
"seconds": [
"秒前"
],
@ -2198,6 +2334,7 @@
]
},
"zu": {
"word_separator": " ",
"seconds": [
"amasekhondi",
"isekhondi"