diff --git a/app/config.py b/app/config.py index 7bf6075..bb2ec5c 100644 --- a/app/config.py +++ b/app/config.py @@ -1,7 +1,7 @@ import os import getpass -AGORA_PATH = os.path.join('/home', getpass.getuser(), 'agora') +AGORA_PATH = os.getenv('AGORA_PATH', os.path.join('/home', getpass.getuser(), 'agora')) AGORA_VERSION = '0.5.3' # With trailing slash. diff --git a/app/db.py b/app/db.py index d0b0bc5..01639a0 100644 --- a/app/db.py +++ b/app/db.py @@ -57,6 +57,11 @@ class Node: # i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]]. self.wikilink = wikilink self.uri = wikilink + # ensure wikilinks to journal entries are all shown in iso format + # (important to do it after self.uri = wikilink to avoid breaking + # links) + if util.is_journal(wikilink): + self.wikilink = util.canonical_wikilink(wikilink) self.url = '/node/' + self.uri self.subnodes = [] @@ -187,7 +192,7 @@ def all_nodes(include_journals=True): # remove journals if so desired. if not include_journals: - nodes = [node for node in nodes if not re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)] + nodes = [node for node in nodes if not util.is_journal(node.wikilink)] # TODO: experiment with other ranking. # return sorted(nodes, key=lambda x: -x.size()) @@ -201,7 +206,7 @@ def all_users(): def all_journals(): # hack hack. nodes = all_nodes() - nodes = [node for node in nodes if re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)] + nodes = [node for node in nodes if util.is_journal(node.wikilink)] return sorted(nodes, key=attrgetter('wikilink'), reverse=True) def nodes_by_wikilink(wikilink): diff --git a/app/util.py b/app/util.py index 25ea671..f70a978 100644 --- a/app/util.py +++ b/app/util.py @@ -11,8 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re +from dateparser import DateDataParser +from functools import lru_cache + +parser = DateDataParser(languages=['en']) def canonical_wikilink(wikilink): + + if is_journal(wikilink): + try: + wikilink = canonical_date(wikilink) + except: + # TODO: if we add logging, maybe log that we couldn't parse a date here + pass + # hack hack - wikilink = wikilink.lower().replace(' ', '-').replace('\'', '').replace(',', '') + wikilink = ( + wikilink.lower() + .replace(' ', '-') + .replace('\'', '') + .replace(',', '') + .replace('/', '-') + ) return wikilink + + +@lru_cache(maxsize=None) +def canonical_date(wikilink): + date = parser.get_date_data(wikilink).date_obj + try: + wikilink = date.isoformat().split("T")[0] + except: + pass + + return wikilink + + +@lru_cache(maxsize=1) #memoize this +def get_combined_date_regex(): + date_regexes = [ + # iso format + '[0-9]{4}-[0-9]{2}-[0-9]{2}', + # roam format (what a monstrosity!) + '(January|February|March|April|May|June|July|August|September|October|November|December) [0-9]{1,2}(st|nd|th), [0-9]{4}', + # roam format (after filename sanitization) + '(january|february|march|april|may|june|july|august|september|october|november|december)-[0-9]{1,2}(st|nd|th)-[0-9]{4}', + ] + + # combine all the date regexes into one super regex + # TODO: it'd really be better to compile this regex once rather than on + # each request, but as the knuth would say premature optimization is the + # root of all evil, etc. etc. + return re.compile(f'^({"|".join(date_regexes)})$') + + +@lru_cache(maxsize=None) +def is_journal(wikilink): + return get_combined_date_regex().match(wikilink) diff --git a/requirements.txt b/requirements.txt index b32d89e..a56fd0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,3 +20,5 @@ webencodings==0.5.1 Werkzeug==1.0.1 WTForms==2.3.3 zipp==3.4.0 +dateparser==1.0.0 +