2020-11-07 14:34:48 +00:00
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import re
import os
2020-11-07 18:38:01 +00:00
from . import config
2020-11-16 14:26:03 +00:00
from . import util
2020-11-14 19:08:50 +00:00
from collections import defaultdict
2020-11-16 12:43:15 +00:00
from fuzzywuzzy import fuzz
2020-11-07 14:34:48 +00:00
from operator import attrgetter
RE_WIKILINKS = re . compile ( ' \ [ \ [(.*?) \ ] \ ] ' )
2020-11-16 13:49:18 +00:00
FUZZ_FACTOR = 95
2020-11-07 14:34:48 +00:00
2020-11-10 21:48:20 +00:00
# URIs are ids.
# - In the case of nodes, their [[wikilink]].
# - Example: 'foo', meaning the node that is rendered when you click on [[foo]] somewhere.
# - In the case of subnodes, a relative path within the Agora.
# - Example: 'garden/flancian/README.md', meaning an actual file called README.md.
# - Note the example subnode above gets rendered in node [[README]], so fetching node with uri README would yield it (and others).
# TODO: implement.
class Graph :
def __init__ ( self ) :
# [[wikilink]] -> Node
self . nodes = { }
# node -> [n0, ..., nn] such that node has outlinks to the target list.
self . edges = { }
def addsubnode ( self , subnode ) :
if subnode . wikilink in self . nodes :
G . nodes [ subnode . wikilink ] . subnodes . append ( subnode )
else :
G . nodes [ subnode . wikilink ] = Node ( subnode . wikilink )
G = Graph ( )
2020-11-07 14:34:48 +00:00
class Node :
2020-11-10 21:48:20 +00:00
""" Nodes map 1:1 to wikilinks.
They resolve to a series of subnodes when being rendered ( see below ) .
It maps to a particular file in the Agora repository , stored ( relative to
the Agora root ) in the attribute ' uri ' . """
def __init__ ( self , wikilink ) :
# Use a node's URI as its identifier.
# Subnodes are attached to the node matching their wikilink.
# i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
self . wikilink = wikilink
self . uri = wikilink
2020-11-28 03:56:19 +00:00
# ensure wikilinks to journal entries are all shown in iso format
# (important to do it after self.uri = wikilink to avoid breaking
# links)
if util . is_journal ( wikilink ) :
self . wikilink = util . canonical_wikilink ( wikilink )
2020-11-10 21:48:20 +00:00
self . url = ' /node/ ' + self . uri
self . subnodes = [ ]
2020-11-14 19:08:50 +00:00
def size ( self ) :
return len ( self . subnodes )
2020-11-22 17:54:08 +00:00
def go ( self ) :
# There's surely a much better way to do this. Alas :)
go = [ ]
for subnode in self . subnodes :
go . extend ( subnode . go ( ) )
return go
2020-11-10 21:48:20 +00:00
class Subnode :
""" A subnode is a note or media resource volunteered by a user of the Agora.
It maps to a particular file in the Agora repository , stored ( relative to
the Agora root ) in the attribute ' uri ' . """
2020-11-07 14:34:48 +00:00
def __init__ ( self , path ) :
2020-11-10 21:48:20 +00:00
# Use a subnode's URI as its identifier.
self . uri = path_to_uri ( path )
self . url = ' /subnode/ ' + path_to_uri ( path )
# Subnodes are attached to the node matching their wikilink.
# i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
2020-11-16 14:26:03 +00:00
self . wikilink = util . canonical_wikilink ( path_to_wikilink ( path ) )
2020-11-10 21:48:20 +00:00
self . user = path_to_user ( path )
2020-11-07 14:34:48 +00:00
with open ( path ) as f :
self . content = f . read ( )
2020-11-17 09:49:21 +00:00
self . mtime = os . path . getmtime ( path )
2020-11-07 14:34:48 +00:00
self . outlinks = content_to_outlinks ( self . content )
2020-11-10 21:48:20 +00:00
self . node = self . wikilink
# Initiate node for wikilink if this is the first subnode, append otherwise.
G . addsubnode ( self )
2020-11-16 13:49:18 +00:00
def __eq__ ( self , other ) :
# hack hack
if fuzz . ratio ( self . wikilink , other . wikilink ) > FUZZ_FACTOR :
return True
else :
return False
def __sub__ ( self , other ) :
# hack hack
return 100 - fuzz . ratio ( self . wikilink , other . wikilink )
def distance ( self , other ) :
# hack hack
return 100 - fuzz . ratio ( self . wikilink , other . wikilink )
2020-11-22 17:54:08 +00:00
def go ( self ) :
# returns a set of go links contained in this node
2020-11-22 18:05:10 +00:00
golinks = subnode_to_actions ( self , ' go ' )
sanitized_golinks = [ ]
for golink in golinks :
if golink . startswith ( ' http ' ) :
sanitized_golinks . append ( golink )
else :
# hack hack.
sanitized_golinks . append ( ' https:// ' + golink )
return sanitized_golinks
2020-11-22 17:54:08 +00:00
def subnode_to_actions ( subnode , action ) :
# hack hack.
action_regex = ' \ [ \ [ ' + action + ' \ ] \ ] (.*?)$ '
content = subnode . content
actions = [ ]
for line in content . splitlines ( ) :
m = re . search ( action_regex , line )
if m :
actions . append ( m . group ( 1 ) )
return actions
2020-11-16 13:49:18 +00:00
2020-11-10 21:48:20 +00:00
class User :
def __init__ ( self , user ) :
self . uri = user
self . url = ' /user/ ' + self . uri
2020-11-22 15:25:28 +00:00
self . subnodes = subnodes_by_user ( user )
def size ( self ) :
return len ( self . subnodes )
2020-11-07 14:34:48 +00:00
2020-11-10 21:48:20 +00:00
def path_to_uri ( path ) :
2020-11-07 18:38:01 +00:00
return path . replace ( config . AGORA_PATH + ' / ' , ' ' )
2020-11-07 14:34:48 +00:00
2020-11-10 21:48:20 +00:00
def path_to_user ( path ) :
m = re . search ( ' garden/(.+?)/ ' , path )
if m :
return m . group ( 1 )
else :
return ' agora '
2020-11-07 14:34:48 +00:00
def path_to_wikilink ( path ) :
return os . path . splitext ( os . path . basename ( path ) ) [ 0 ]
def content_to_outlinks ( content ) :
# hack hack.
match = RE_WIKILINKS . findall ( content )
if match :
2020-11-16 14:26:03 +00:00
return [ util . canonical_wikilink ( m ) for m in match ]
2020-11-07 14:34:48 +00:00
else :
return [ ]
2020-11-17 09:49:21 +00:00
def all_subnodes ( sort = True ) :
2020-11-14 19:08:50 +00:00
subnodes = [ Subnode ( f ) for f in glob . glob ( os . path . join ( config . AGORA_PATH , ' **/*.md ' ) , recursive = True ) ]
2020-11-17 09:49:21 +00:00
if sort :
return sorted ( subnodes , key = lambda x : x . uri . lower ( ) )
else :
return subnodes
def latest ( ) :
subnodes = all_subnodes ( sort = False )
return sorted ( subnodes , key = lambda x : - x . mtime )
2020-11-14 19:08:50 +00:00
def all_nodes ( include_journals = True ) :
# first we fetch all subnodes, put them in a dict {wikilink -> [subnode]}.
# hack hack -- there's something in itertools better than this.
wikilink_to_subnodes = defaultdict ( list )
for subnode in all_subnodes ( ) :
wikilink_to_subnodes [ subnode . wikilink ] . append ( subnode )
# then we iterate over its values and construct nodes for each list of subnodes.
nodes = [ ]
for wikilink in wikilink_to_subnodes :
node = Node ( wikilink )
node . subnodes = wikilink_to_subnodes [ wikilink ]
nodes . append ( node )
# remove journals if so desired.
if not include_journals :
2020-11-28 03:18:51 +00:00
nodes = [ node for node in nodes if not util . is_journal ( node . wikilink ) ]
2020-11-14 19:08:50 +00:00
# TODO: experiment with other ranking.
# return sorted(nodes, key=lambda x: -x.size())
2020-11-16 12:45:17 +00:00
return sorted ( nodes , key = lambda x : x . wikilink . lower ( ) )
2020-11-10 21:48:20 +00:00
def all_users ( ) :
# hack hack.
users = os . listdir ( os . path . join ( config . AGORA_PATH , ' garden ' ) )
2020-11-14 19:08:50 +00:00
return sorted ( [ User ( u ) for u in users ] , key = lambda x : x . uri . lower ( ) )
2020-11-07 14:34:48 +00:00
def all_journals ( ) :
# hack hack.
2020-11-14 19:08:50 +00:00
nodes = all_nodes ( )
2020-11-28 03:18:51 +00:00
nodes = [ node for node in nodes if util . is_journal ( node . wikilink ) ]
2020-11-14 19:08:50 +00:00
return sorted ( nodes , key = attrgetter ( ' wikilink ' ) , reverse = True )
2020-11-07 14:34:48 +00:00
def nodes_by_wikilink ( wikilink ) :
nodes = [ node for node in all_nodes ( ) if node . wikilink == wikilink ]
return nodes
2020-11-16 12:43:15 +00:00
def subnodes_by_wikilink ( wikilink , fuzzy_matching = True ) :
if fuzzy_matching :
2020-11-16 12:39:19 +00:00
# TODO
2020-11-16 13:49:18 +00:00
subnodes = [ subnode for subnode in all_subnodes ( ) if fuzz . ratio ( subnode . wikilink , wikilink ) > FUZZ_FACTOR ]
2020-11-16 12:39:19 +00:00
else :
subnodes = [ subnode for subnode in all_subnodes ( ) if subnode . wikilink == wikilink ]
2020-11-10 21:48:20 +00:00
return subnodes
2020-11-16 14:51:34 +00:00
def search_subnodes ( query ) :
2020-11-16 15:39:30 +00:00
subnodes = [ subnode for subnode in all_subnodes ( ) if re . search ( query , subnode . content , re . IGNORECASE ) ]
2020-11-16 14:51:34 +00:00
return subnodes
2020-11-10 21:48:20 +00:00
def subnodes_by_user ( user ) :
subnodes = [ subnode for subnode in all_subnodes ( ) if subnode . user == user ]
return subnodes
2020-11-16 20:29:37 +00:00
def user_readmes ( user ) :
# hack hack
# fix duplication.
subnodes = [ subnode for subnode in all_subnodes ( ) if subnode . user == user and re . search ( ' readme ' , subnode . wikilink , re . IGNORECASE ) ]
return subnodes
2020-11-10 21:48:20 +00:00
def subnode_by_uri ( uri ) :
2020-11-16 20:15:02 +00:00
subnode = [ subnode for subnode in all_subnodes ( ) if subnode . uri == uri ]
if subnode :
return subnode [ 0 ]
else :
# TODO: handle.
return False
2020-11-10 21:48:20 +00:00
2020-11-07 14:34:48 +00:00
def nodes_by_outlink ( wikilink ) :
nodes = [ node for node in all_nodes ( ) if wikilink in node . outlinks ]
return nodes
2020-11-10 21:48:20 +00:00
def subnodes_by_outlink ( wikilink ) :
2020-11-16 14:07:44 +00:00
# This doesn't work. It matches too much/too little for some reason. Debug someday?
2020-11-16 13:49:18 +00:00
# subnodes = [subnode for subnode in all_subnodes() if [wikilink for wikilink in subnode.outlinks if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]]
2020-11-16 14:26:03 +00:00
subnodes = [ subnode for subnode in all_subnodes ( ) if util . canonical_wikilink ( wikilink ) in subnode . outlinks ]
2020-11-10 21:48:20 +00:00
return subnodes