Merge pull request #1 from nikvdp/support-roam-dates

Support roam dates
This commit is contained in:
flancian 2020-11-29 17:42:27 +01:00 committed by GitHub
commit 77f305387e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 64 additions and 4 deletions

View File

@ -1,7 +1,7 @@
import os
import getpass
AGORA_PATH = os.path.join('/home', getpass.getuser(), 'agora')
AGORA_PATH = os.getenv('AGORA_PATH', os.path.join('/home', getpass.getuser(), 'agora'))
AGORA_VERSION = '0.5.3'
# With trailing slash.

View File

@ -57,6 +57,11 @@ class Node:
# i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
self.wikilink = wikilink
self.uri = wikilink
# ensure wikilinks to journal entries are all shown in iso format
# (important to do it after self.uri = wikilink to avoid breaking
# links)
if util.is_journal(wikilink):
self.wikilink = util.canonical_wikilink(wikilink)
self.url = '/node/' + self.uri
self.subnodes = []
@ -187,7 +192,7 @@ def all_nodes(include_journals=True):
# remove journals if so desired.
if not include_journals:
nodes = [node for node in nodes if not re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)]
nodes = [node for node in nodes if not util.is_journal(node.wikilink)]
# TODO: experiment with other ranking.
# return sorted(nodes, key=lambda x: -x.size())
@ -201,7 +206,7 @@ def all_users():
def all_journals():
# hack hack.
nodes = all_nodes()
nodes = [node for node in nodes if re.match('[0-9]+?-[0-9]+?-[0-9]+?', node.wikilink)]
nodes = [node for node in nodes if util.is_journal(node.wikilink)]
return sorted(nodes, key=attrgetter('wikilink'), reverse=True)
def nodes_by_wikilink(wikilink):

View File

@ -11,8 +11,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from dateparser import DateDataParser
from functools import lru_cache
parser = DateDataParser(languages=['en'])
def canonical_wikilink(wikilink):
if is_journal(wikilink):
try:
wikilink = canonical_date(wikilink)
except:
# TODO: if we add logging, maybe log that we couldn't parse a date here
pass
# hack hack
wikilink = wikilink.lower().replace(' ', '-').replace('\'', '').replace(',', '')
wikilink = (
wikilink.lower()
.replace(' ', '-')
.replace('\'', '')
.replace(',', '')
.replace('/', '-')
)
return wikilink
@lru_cache(maxsize=None)
def canonical_date(wikilink):
date = parser.get_date_data(wikilink).date_obj
try:
wikilink = date.isoformat().split("T")[0]
except:
pass
return wikilink
@lru_cache(maxsize=1) #memoize this
def get_combined_date_regex():
date_regexes = [
# iso format
'[0-9]{4}-[0-9]{2}-[0-9]{2}',
# roam format (what a monstrosity!)
'(January|February|March|April|May|June|July|August|September|October|November|December) [0-9]{1,2}(st|nd|th), [0-9]{4}',
# roam format (after filename sanitization)
'(january|february|march|april|may|june|july|august|september|october|november|december)-[0-9]{1,2}(st|nd|th)-[0-9]{4}',
]
# combine all the date regexes into one super regex
# TODO: it'd really be better to compile this regex once rather than on
# each request, but as the knuth would say premature optimization is the
# root of all evil, etc. etc.
return re.compile(f'^({"|".join(date_regexes)})$')
@lru_cache(maxsize=None)
def is_journal(wikilink):
return get_combined_date_regex().match(wikilink)

View File

@ -20,3 +20,5 @@ webencodings==0.5.1
Werkzeug==1.0.1
WTForms==2.3.3
zipp==3.4.0
dateparser==1.0.0