Merge pull request #1 from nikvdp/support-roam-dates

Support roam dates
2020-11-29 17:42:27 +01:00 · 2020-11-29 17:42:27 +01:00 · 77f305387e
commit 77f305387e
parent 8ee3f92b9f e5ae74881a
4 changed files with 64 additions and 4 deletions
--- a/app/config.py
+++ b/app/config.py
@ -1,7 +1,7 @@
 import os
 import getpass

-AGORA_PATH = os.path.join('/home', getpass.getuser(), 'agora')
+AGORA_PATH = os.getenv('AGORA_PATH', os.path.join('/home', getpass.getuser(), 'agora'))
 AGORA_VERSION = '0.5.3'

 # With trailing slash.
--- a/app/db.py
+++ b/app/db.py
@ -57,6 +57,11 @@ class Node:
        # i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
        self.wikilink = wikilink
        self.uri = wikilink
+        # ensure wikilinks to journal entries are all shown in iso format
+        # (important to do it after self.uri = wikilink to avoid breaking
+        # links)
+        if util.is_journal(wikilink):
+            self.wikilink = util.canonical_wikilink(wikilink)
        self.url = '/node/' + self.uri
        self.subnodes = []

@ -187,7 +192,7 @@ def all_nodes(include_journals=True):

    # remove journals if so desired.
    if not include_journals:
-        nodes = [node for node in nodes if not re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)]
+        nodes = [node for node in nodes if not util.is_journal(node.wikilink)]

    # TODO: experiment with other ranking.
    # return sorted(nodes, key=lambda x: -x.size())
@ -201,7 +206,7 @@ def all_users():
 def all_journals():
    # hack hack.
    nodes = all_nodes()
-    nodes = [node for node in nodes if re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)]
+    nodes = [node for node in nodes if util.is_journal(node.wikilink)]
    return sorted(nodes, key=attrgetter('wikilink'), reverse=True)

 def nodes_by_wikilink(wikilink):
--- a/app/util.py
+++ b/app/util.py
@ -11,8 +11,61 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import re
+from dateparser import DateDataParser
+from functools import lru_cache
+
+parser = DateDataParser(languages=['en'])

 def canonical_wikilink(wikilink):
+
+    if is_journal(wikilink):
+        try:
+            wikilink = canonical_date(wikilink)
+        except:
+            # TODO: if we add logging, maybe log that we couldn't parse a date here
+            pass
+
    # hack hack
-    wikilink = wikilink.lower().replace(' ', '-').replace('\'', '').replace(',', '')
+    wikilink = (
+        wikilink.lower()
+        .replace(' ', '-')
+        .replace('\'', '')
+        .replace(',', '')
+        .replace('/', '-')
+    )
    return wikilink
+
+
+@lru_cache(maxsize=None)
+def canonical_date(wikilink):
+    date = parser.get_date_data(wikilink).date_obj
+    try:
+        wikilink = date.isoformat().split("T")[0]
+    except:
+        pass
+
+    return wikilink
+
+
+@lru_cache(maxsize=1)  #memoize this
+def get_combined_date_regex():
+    date_regexes = [
+        # iso format
+        '[0-9]{4}-[0-9]{2}-[0-9]{2}',
+        # roam format (what a monstrosity!)
+        '(January|February|March|April|May|June|July|August|September|October|November|December) [0-9]{1,2}(st|nd|th), [0-9]{4}',
+        # roam format (after filename sanitization)
+        '(january|february|march|april|may|june|july|august|september|october|november|december)-[0-9]{1,2}(st|nd|th)-[0-9]{4}',
+    ]
+
+    # combine all the date regexes into one super regex
+    # TODO: it'd really be better to compile this regex once rather than on
+    # each request, but as the knuth would say premature optimization is the
+    # root of all evil, etc. etc.
+    return re.compile(f'^({"|".join(date_regexes)})$')
+
+
+@lru_cache(maxsize=None)
+def is_journal(wikilink):
+    return get_combined_date_regex().match(wikilink)
--- a/requirements.txt
+++ b/requirements.txt
@ -20,3 +20,5 @@ webencodings==0.5.1
 Werkzeug==1.0.1
 WTForms==2.3.3
 zipp==3.4.0
+dateparser==1.0.0
+