agora-server/app/util.py

# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from dateparser import DateDataParser
from functools import lru_cache

parser = DateDataParser(languages=['en'])

def rank(l, user):
    # hack hack
    return sorted(l, key=lambda x: x.user)

def canonical_wikilink(wikilink):

    if is_journal(wikilink):
        try:
            wikilink = canonical_date(wikilink)
        except:
            # TODO: if we add logging, maybe log that we couldn't parse a date here
            pass

    # hack hack
    wikilink = (
        wikilink.lower()
        .replace(' ', '-')
        .replace('\'', '')
        .replace(',', '')
        .replace('/', '-')
    )
    return wikilink


@lru_cache(maxsize=None)
def canonical_date(wikilink):
    date = parser.get_date_data(wikilink).date_obj
    try:
        wikilink = date.isoformat().split("T")[0]
    except:
        pass

    return wikilink


@lru_cache(maxsize=1)  #memoize this
def get_combined_date_regex():
    date_regexes = [
        # iso format
        '[0-9]{4}-[0-9]{2}-[0-9]{2}',
        # roam format (what a monstrosity!)
        '(January|February|March|April|May|June|July|August|September|October|November|December) [0-9]{1,2}(st|nd|th), [0-9]{4}',
        # roam format (after filename sanitization)
        '(january|february|march|april|may|june|july|august|september|october|november|december)-[0-9]{1,2}(st|nd|th)-[0-9]{4}',
    ]

    # combine all the date regexes into one super regex
    # TODO: it'd really be better to compile this regex once rather than on
    # each request, but as the knuth would say premature optimization is the
    # root of all evil, etc. etc.
    return re.compile(f'^({"|".join(date_regexes)})$')


@lru_cache(maxsize=None)
def is_journal(wikilink):
    return get_combined_date_regex().match(wikilink)
Add util.py, a bag of holding (hack hack). 2020-11-16 14:26:03 +00:00			`# Copyright 2020 Google LLC`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
make journals in roam date format be recognized by journals page 2020-11-28 03:18:51 +00:00			`import re`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00			`from dateparser import DateDataParser`
			`from functools import lru_cache`
Add util.py, a bag of holding (hack hack). 2020-11-16 14:26:03 +00:00
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`parser = DateDataParser(languages=['en'])`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00
Move index to be just another node. 2020-12-04 21:11:44 +00:00			`def rank(l, user):`
			`# hack hack`
			`return sorted(l, key=lambda x: x.user)`

Add util.py, a bag of holding (hack hack). 2020-11-16 14:26:03 +00:00			`def canonical_wikilink(wikilink):`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00
			`if is_journal(wikilink):`
			`try:`
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`wikilink = canonical_date(wikilink)`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00			`except:`
			`# TODO: if we add logging, maybe log that we couldn't parse a date here`
			`pass`

Add util.py, a bag of holding (hack hack). 2020-11-16 14:26:03 +00:00			`# hack hack`
protect against / in page title (common in roam exports) roam's namespace feature recommends the use of / in page titles, but since we are using a file-based approach here important to filter them out lest we end up with subfolder madness 2020-11-28 02:32:56 +00:00			`wikilink = (`
			`wikilink.lower()`
			`.replace(' ', '-')`
			`.replace('\'', '')`
			`.replace(',', '')`
			`.replace('/', '-')`
			`)`
Add util.py, a bag of holding (hack hack). 2020-11-16 14:26:03 +00:00			`return wikilink`
make journals in roam date format be recognized by journals page 2020-11-28 03:18:51 +00:00

make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00			`@lru_cache(maxsize=None)`
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`def canonical_date(wikilink):`
			`date = parser.get_date_data(wikilink).date_obj`
			`try:`
remove an inadvertent booby trap 2020-11-29 02:18:48 +00:00			`wikilink = date.isoformat().split("T")[0]`
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`except:`
			`pass`

remove an inadvertent booby trap 2020-11-29 02:18:48 +00:00			`return wikilink`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00
			`@lru_cache(maxsize=1) #memoize this`
			`def get_combined_date_regex():`
make journals in roam date format be recognized by journals page 2020-11-28 03:18:51 +00:00			`date_regexes = [`
			`# iso format`
			`'[0-9]{4}-[0-9]{2}-[0-9]{2}',`
			`# roam format (what a monstrosity!)`
			`'(January\|February\|March\|April\|May\|June\|July\|August\|September\|October\|November\|December) [0-9]{1,2}(st\|nd\|th), [0-9]{4}',`
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`# roam format (after filename sanitization)`
make journals in roam date format be recognized by journals page 2020-11-28 03:18:51 +00:00			`'(january\|february\|march\|april\|may\|june\|july\|august\|september\|october\|november\|december)-[0-9]{1,2}(st\|nd\|th)-[0-9]{4}',`
			`]`

			`# combine all the date regexes into one super regex`
make wikilinks for journals all render in iso format (very inefficient implementation for now) 2020-11-28 03:56:19 +00:00			`# TODO: it'd really be better to compile this regex once rather than on`
			`# each request, but as the knuth would say premature optimization is the`
			`# root of all evil, etc. etc.`
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`return re.compile(f'^({"\|".join(date_regexes)})$')`

make journals in roam date format be recognized by journals page 2020-11-28 03:18:51 +00:00
use lru_cache to make journal date conversion less inefficient, cleanups 2020-11-28 04:06:40 +00:00			`@lru_cache(maxsize=None)`
			`def is_journal(wikilink):`
			`return get_combined_date_regex().match(wikilink)`