2020-11-07 14:34:48 +00:00
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2020-12-27 19:08:01 +00:00
import cachetools . func
2020-11-07 14:34:48 +00:00
import glob
import re
import os
2020-11-07 18:38:01 +00:00
from . import config
2020-11-16 14:26:03 +00:00
from . import util
2020-11-14 19:08:50 +00:00
from collections import defaultdict
2020-11-16 12:43:15 +00:00
from fuzzywuzzy import fuzz
2020-11-07 14:34:48 +00:00
from operator import attrgetter
2020-12-27 19:08:01 +00:00
# TODO: move action extractor regex here as well.
2020-11-07 14:34:48 +00:00
RE_WIKILINKS = re . compile ( ' \ [ \ [(.*?) \ ] \ ] ' )
2020-11-16 13:49:18 +00:00
FUZZ_FACTOR = 95
2020-11-07 14:34:48 +00:00
2020-12-27 19:08:01 +00:00
2020-11-10 21:48:20 +00:00
# URIs are ids.
# - In the case of nodes, their [[wikilink]].
# - Example: 'foo', meaning the node that is rendered when you click on [[foo]] somewhere.
# - In the case of subnodes, a relative path within the Agora.
# - Example: 'garden/flancian/README.md', meaning an actual file called README.md.
# - Note the example subnode above gets rendered in node [[README]], so fetching node with uri README would yield it (and others).
# TODO: implement.
class Graph :
def __init__ ( self ) :
2020-12-27 19:08:01 +00:00
# Revisit.
pass
def edge ( self , n0 , n1 ) :
pass
def edges ( self ) :
pass
def node ( self , uri ) :
# looks up a node by uri (essentially [[wikilink]]).
# horrible
2020-12-27 20:07:46 +00:00
return wikilink_to_node ( uri )
2020-12-27 19:08:01 +00:00
def nodes ( self , include_journals = True ) :
# returns a list of all nodes
# first we fetch all subnodes, put them in a dict {wikilink -> [subnode]}.
# hack hack -- there's something in itertools better than this.
wikilink_to_subnodes = defaultdict ( list )
for subnode in self . subnodes ( ) :
wikilink_to_subnodes [ subnode . wikilink ] . append ( subnode )
# then we iterate over its values and construct nodes for each list of subnodes.
nodes = [ ]
for wikilink in wikilink_to_subnodes :
node = Node ( wikilink )
node . subnodes = wikilink_to_subnodes [ wikilink ]
nodes . append ( node )
# remove journals if so desired.
if not include_journals :
nodes = [ node for node in nodes if not util . is_journal ( node . wikilink ) ]
# TODO: experiment with other ranking.
# return sorted(nodes, key=lambda x: -x.size())
return sorted ( nodes , key = lambda x : x . wikilink . lower ( ) )
# does this belong here?
@cachetools.func.ttl_cache ( maxsize = 1 , ttl = 20 )
2020-12-27 19:53:11 +00:00
def subnodes ( self , sort = lambda x : x . uri . lower ( ) ) :
2020-12-27 19:08:01 +00:00
subnodes = [ Subnode ( f ) for f in glob . glob ( os . path . join ( config . AGORA_PATH , ' **/*.md ' ) , recursive = True ) ]
if sort :
2020-12-27 19:53:11 +00:00
return sorted ( subnodes , key = sort )
2020-11-10 21:48:20 +00:00
else :
2020-12-27 19:08:01 +00:00
return subnodes
2020-11-10 21:48:20 +00:00
G = Graph ( )
2020-11-07 14:34:48 +00:00
class Node :
2020-11-10 21:48:20 +00:00
""" Nodes map 1:1 to wikilinks.
They resolve to a series of subnodes when being rendered ( see below ) .
It maps to a particular file in the Agora repository , stored ( relative to
the Agora root ) in the attribute ' uri ' . """
def __init__ ( self , wikilink ) :
# Use a node's URI as its identifier.
# Subnodes are attached to the node matching their wikilink.
# i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
self . wikilink = wikilink
self . uri = wikilink
2020-11-28 03:56:19 +00:00
# ensure wikilinks to journal entries are all shown in iso format
# (important to do it after self.uri = wikilink to avoid breaking
# links)
if util . is_journal ( wikilink ) :
self . wikilink = util . canonical_wikilink ( wikilink )
2020-11-10 21:48:20 +00:00
self . url = ' /node/ ' + self . uri
self . subnodes = [ ]
2020-11-14 19:08:50 +00:00
def size ( self ) :
return len ( self . subnodes )
2020-11-22 17:54:08 +00:00
def go ( self ) :
# There's surely a much better way to do this. Alas :)
2020-12-22 00:08:47 +00:00
links = [ ]
2020-11-22 17:54:08 +00:00
for subnode in self . subnodes :
2020-12-22 00:08:47 +00:00
links . extend ( subnode . go ( ) )
2020-12-23 09:47:20 +00:00
return links
2020-11-22 17:54:08 +00:00
2021-01-04 18:20:44 +00:00
# The following section is particularly confusing.
# Some functions return wikilinks, some return full blown nodes.
# We probably want to converge on the latter.
# TODO: fix.
2020-12-22 00:08:47 +00:00
def forward_links ( self ) :
links = [ ]
for subnode in self . subnodes :
2020-12-22 13:52:11 +00:00
links . extend ( subnode . forward_links )
2020-12-22 00:08:47 +00:00
return sorted ( set ( links ) )
2020-12-27 19:08:01 +00:00
# Pattern: (subject).action_object.
# Could be modeled with RDF?
def pull_nodes ( self ) :
2021-01-04 18:20:44 +00:00
# the nodes *being pulled* by this node.
2020-12-27 19:08:01 +00:00
nodes = [ ]
2020-12-22 00:08:47 +00:00
for subnode in self . subnodes :
2020-12-27 19:08:01 +00:00
nodes . extend ( subnode . pull_nodes ( ) )
2020-12-27 20:28:05 +00:00
return sorted ( set ( nodes ) , key = lambda x : x . uri )
2020-12-22 00:08:47 +00:00
2021-01-04 18:20:44 +00:00
def pulling_nodes ( self ) :
# the nodes pulling *this* node.
# compare with: pull_nodes.
nodes = [ ]
for wikilink in self . back_links ( ) :
n = G . node ( wikilink )
if self . wikilink in [ n . wikilink for n in n . pull_nodes ( ) ] :
nodes . append ( n )
return nodes
2020-12-22 00:08:47 +00:00
def push_links ( self ) :
links = [ ]
for subnode in self . subnodes :
links . extend ( subnode . push_links ( ) )
return sorted ( set ( links ) )
2021-01-04 18:20:44 +00:00
def back_links ( self ) :
return sorted ( [ x . wikilink for x in nodes_by_outlink ( self . wikilink ) ] )
2020-11-22 17:54:08 +00:00
2020-11-10 21:48:20 +00:00
class Subnode :
""" A subnode is a note or media resource volunteered by a user of the Agora.
It maps to a particular file in the Agora repository , stored ( relative to
the Agora root ) in the attribute ' uri ' . """
2020-11-07 14:34:48 +00:00
def __init__ ( self , path ) :
2020-11-10 21:48:20 +00:00
# Use a subnode's URI as its identifier.
self . uri = path_to_uri ( path )
self . url = ' /subnode/ ' + path_to_uri ( path )
# Subnodes are attached to the node matching their wikilink.
# i.e. if two users contribute subnodes titled [[foo]], they both show up when querying node [[foo]].
2020-11-16 14:26:03 +00:00
self . wikilink = util . canonical_wikilink ( path_to_wikilink ( path ) )
2020-11-10 21:48:20 +00:00
self . user = path_to_user ( path )
2020-11-07 14:34:48 +00:00
with open ( path ) as f :
self . content = f . read ( )
2020-11-17 09:49:21 +00:00
self . mtime = os . path . getmtime ( path )
2020-12-22 13:52:11 +00:00
self . forward_links = content_to_forward_links ( self . content )
2020-11-10 21:48:20 +00:00
self . node = self . wikilink
# Initiate node for wikilink if this is the first subnode, append otherwise.
2020-12-27 19:08:01 +00:00
# G.addsubnode(self)
2020-11-10 21:48:20 +00:00
2020-11-16 13:49:18 +00:00
def __eq__ ( self , other ) :
# hack hack
if fuzz . ratio ( self . wikilink , other . wikilink ) > FUZZ_FACTOR :
return True
else :
return False
def __sub__ ( self , other ) :
# hack hack
return 100 - fuzz . ratio ( self . wikilink , other . wikilink )
def distance ( self , other ) :
# hack hack
return 100 - fuzz . ratio ( self . wikilink , other . wikilink )
2020-11-22 17:54:08 +00:00
def go ( self ) :
2020-12-20 21:06:37 +00:00
"""
returns a set of go links contained in this subnode
go links are blocks of the form :
- [ [ go ] ] protocol : / / example . org / url
protocol defaults to https .
"""
2020-11-22 18:05:10 +00:00
golinks = subnode_to_actions ( self , ' go ' )
sanitized_golinks = [ ]
for golink in golinks :
2020-12-20 21:06:37 +00:00
# should probably instead check for contains: //
2020-11-22 18:05:10 +00:00
if golink . startswith ( ' http ' ) :
sanitized_golinks . append ( golink )
else :
# hack hack.
sanitized_golinks . append ( ' https:// ' + golink )
return sanitized_golinks
2020-11-22 17:54:08 +00:00
2020-12-27 19:08:01 +00:00
def pull_nodes ( self ) :
2020-12-20 21:06:37 +00:00
"""
2020-12-27 19:08:01 +00:00
returns a set of nodes pulled ( anagora . org / node / pull ) in this subnode
pulls are blocks of the form :
2020-12-20 21:06:37 +00:00
- [ [ pull ] ] [ [ node ] ]
"""
# TODO: test.
2020-12-27 19:41:59 +00:00
pull_blocks = subnode_to_actions ( self , ' pull ' )
pull_nodes = content_to_forward_links ( " \n " . join ( pull_blocks ) )
return [ G . node ( node ) for node in pull_nodes ]
2020-12-22 00:08:47 +00:00
def push_links ( self ) :
"""
returns a set of push links contained in this subnode
push links are blocks of the form :
- [ [ push ] ] [ [ node ] ]
TODO : refactor with the above .
"""
# TODO: test.
push_links = subnode_to_actions ( self , ' push ' )
2020-12-22 13:52:11 +00:00
entities = content_to_forward_links ( " \n " . join ( push_links ) )
2020-12-20 21:06:37 +00:00
return entities
2020-11-22 17:54:08 +00:00
2020-12-22 00:08:47 +00:00
2020-11-22 17:54:08 +00:00
def subnode_to_actions ( subnode , action ) :
# hack hack.
action_regex = ' \ [ \ [ ' + action + ' \ ] \ ] (.*?)$ '
content = subnode . content
actions = [ ]
for line in content . splitlines ( ) :
m = re . search ( action_regex , line )
if m :
actions . append ( m . group ( 1 ) )
return actions
2020-11-16 13:49:18 +00:00
2020-11-10 21:48:20 +00:00
class User :
def __init__ ( self , user ) :
self . uri = user
self . url = ' /user/ ' + self . uri
2020-11-22 15:25:28 +00:00
self . subnodes = subnodes_by_user ( user )
def size ( self ) :
return len ( self . subnodes )
2020-11-07 14:34:48 +00:00
2020-11-10 21:48:20 +00:00
def path_to_uri ( path ) :
2020-11-07 18:38:01 +00:00
return path . replace ( config . AGORA_PATH + ' / ' , ' ' )
2020-11-07 14:34:48 +00:00
2020-11-10 21:48:20 +00:00
def path_to_user ( path ) :
m = re . search ( ' garden/(.+?)/ ' , path )
if m :
return m . group ( 1 )
else :
return ' agora '
2020-11-07 14:34:48 +00:00
def path_to_wikilink ( path ) :
return os . path . splitext ( os . path . basename ( path ) ) [ 0 ]
2020-12-22 13:52:11 +00:00
def content_to_forward_links ( content ) :
2020-11-07 14:34:48 +00:00
# hack hack.
match = RE_WIKILINKS . findall ( content )
if match :
2020-11-16 14:26:03 +00:00
return [ util . canonical_wikilink ( m ) for m in match ]
2020-11-07 14:34:48 +00:00
else :
return [ ]
2020-11-17 09:49:21 +00:00
def latest ( ) :
2020-12-27 19:08:01 +00:00
return sorted ( G . subnodes ( ) , key = lambda x : - x . mtime )
2020-11-10 21:48:20 +00:00
def all_users ( ) :
# hack hack.
users = os . listdir ( os . path . join ( config . AGORA_PATH , ' garden ' ) )
2020-11-14 19:08:50 +00:00
return sorted ( [ User ( u ) for u in users ] , key = lambda x : x . uri . lower ( ) )
2020-11-07 14:34:48 +00:00
def all_journals ( ) :
# hack hack.
2020-12-27 19:08:01 +00:00
nodes = G . nodes ( )
2020-11-28 03:18:51 +00:00
nodes = [ node for node in nodes if util . is_journal ( node . wikilink ) ]
2020-11-14 19:08:50 +00:00
return sorted ( nodes , key = attrgetter ( ' wikilink ' ) , reverse = True )
2020-11-07 14:34:48 +00:00
def nodes_by_wikilink ( wikilink ) :
2020-12-27 19:08:01 +00:00
nodes = [ node for node in G . nodes ( ) if node . wikilink == wikilink ]
2020-11-07 14:34:48 +00:00
return nodes
2020-12-23 21:36:31 +00:00
def wikilink_to_node ( node ) :
try :
return nodes_by_wikilink ( node ) [ 0 ]
except ( KeyError , IndexError ) :
# We'll handle 404 in the template, as we want to show backlinks to non-existent nodes.
2020-12-27 20:07:46 +00:00
# Return an empty.
return Node ( node )
2020-12-23 21:36:31 +00:00
2020-11-16 12:43:15 +00:00
def subnodes_by_wikilink ( wikilink , fuzzy_matching = True ) :
if fuzzy_matching :
2020-11-16 12:39:19 +00:00
# TODO
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if fuzz . ratio ( subnode . wikilink , wikilink ) > FUZZ_FACTOR ]
2020-11-16 12:39:19 +00:00
else :
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if subnode . wikilink == wikilink ]
2020-11-10 21:48:20 +00:00
return subnodes
2020-11-16 14:51:34 +00:00
def search_subnodes ( query ) :
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if re . search ( query , subnode . content , re . IGNORECASE ) ]
2020-11-16 14:51:34 +00:00
return subnodes
2020-11-10 21:48:20 +00:00
def subnodes_by_user ( user ) :
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if subnode . user == user ]
2020-11-10 21:48:20 +00:00
return subnodes
2020-11-16 20:29:37 +00:00
def user_readmes ( user ) :
# hack hack
# fix duplication.
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if subnode . user == user and re . search ( ' readme ' , subnode . wikilink , re . IGNORECASE ) ]
2020-11-16 20:29:37 +00:00
return subnodes
2020-11-10 21:48:20 +00:00
def subnode_by_uri ( uri ) :
2020-12-27 19:08:01 +00:00
subnode = [ subnode for subnode in G . subnodes ( ) if subnode . uri == uri ]
2020-11-16 20:15:02 +00:00
if subnode :
return subnode [ 0 ]
else :
# TODO: handle.
return False
2020-11-10 21:48:20 +00:00
2020-11-07 14:34:48 +00:00
def nodes_by_outlink ( wikilink ) :
2020-12-27 19:08:01 +00:00
nodes = [ node for node in G . nodes ( ) if wikilink in node . forward_links ( ) ]
2020-12-22 13:52:11 +00:00
return sorted ( nodes , key = attrgetter ( ' wikilink ' ) )
2020-11-10 21:48:20 +00:00
def subnodes_by_outlink ( wikilink ) :
2020-11-16 14:07:44 +00:00
# This doesn't work. It matches too much/too little for some reason. Debug someday?
2020-12-22 13:52:11 +00:00
# subnodes = [subnode for subnode in all_subnodes() if [wikilink for wikilink in subnode.forward_links if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]]
2020-12-27 19:08:01 +00:00
subnodes = [ subnode for subnode in G . subnodes ( ) if util . canonical_wikilink ( wikilink ) in subnode . forward_links ]
2020-11-10 21:48:20 +00:00
return subnodes