Start moving to Graph(). Add caching. Add pulled links.
This commit is contained in:
parent
e9628a2b3a
commit
98ceec37ea
5 changed files with 108 additions and 70 deletions
|
@ -112,10 +112,10 @@ def wikilink(node):
|
||||||
'node_rendered.html',
|
'node_rendered.html',
|
||||||
wikilink=node,
|
wikilink=node,
|
||||||
subnodes=db.subnodes_by_wikilink(node),
|
subnodes=db.subnodes_by_wikilink(node),
|
||||||
|
pull_nodes=n.pull_nodes() if n else [],
|
||||||
# backlinks=db.nodes_by_outlink(node),
|
# backlinks=db.nodes_by_outlink(node),
|
||||||
backlinks=[x.wikilink for x in db.nodes_by_outlink(node)],
|
backlinks=[x.wikilink for x in db.nodes_by_outlink(node)],
|
||||||
pushlinks=n.push_links() if n else [],
|
pushlinks=n.push_links() if n else [],
|
||||||
pulllinks=n.pull_links() if n else [],
|
|
||||||
forwardlinks=n.forward_links() if n else [],
|
forwardlinks=n.forward_links() if n else [],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
136
app/db.py
136
app/db.py
|
@ -12,6 +12,7 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
import cachetools.func
|
||||||
import glob
|
import glob
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
@ -21,9 +22,12 @@ from collections import defaultdict
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
from operator import attrgetter
|
from operator import attrgetter
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: move action extractor regex here as well.
|
||||||
RE_WIKILINKS = re.compile('\[\[(.*?)\]\]')
|
RE_WIKILINKS = re.compile('\[\[(.*?)\]\]')
|
||||||
FUZZ_FACTOR = 95
|
FUZZ_FACTOR = 95
|
||||||
|
|
||||||
|
|
||||||
# URIs are ids.
|
# URIs are ids.
|
||||||
# - In the case of nodes, their [[wikilink]].
|
# - In the case of nodes, their [[wikilink]].
|
||||||
# - Example: 'foo', meaning the node that is rendered when you click on [[foo]] somewhere.
|
# - Example: 'foo', meaning the node that is rendered when you click on [[foo]] somewhere.
|
||||||
|
@ -34,15 +38,55 @@ FUZZ_FACTOR = 95
|
||||||
# TODO: implement.
|
# TODO: implement.
|
||||||
class Graph:
|
class Graph:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# [[wikilink]] -> Node
|
# Revisit.
|
||||||
self.nodes = {}
|
pass
|
||||||
# node -> [n0, ..., nn] such that node has forward_links to the target list.
|
|
||||||
self.edges = {}
|
def edge(self, n0, n1):
|
||||||
def addsubnode(self, subnode):
|
pass
|
||||||
if subnode.wikilink in self.nodes:
|
|
||||||
G.nodes[subnode.wikilink].subnodes.append(subnode)
|
def edges(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def node(self, uri):
|
||||||
|
# looks up a node by uri (essentially [[wikilink]]).
|
||||||
|
# horrible
|
||||||
|
nodes = self.nodes()
|
||||||
|
return [node for node in nodes if node.uri == uri][0]
|
||||||
|
|
||||||
|
def nodes(self, include_journals=True):
|
||||||
|
# returns a list of all nodes
|
||||||
|
|
||||||
|
# first we fetch all subnodes, put them in a dict {wikilink -> [subnode]}.
|
||||||
|
# hack hack -- there's something in itertools better than this.
|
||||||
|
wikilink_to_subnodes = defaultdict(list)
|
||||||
|
|
||||||
|
for subnode in self.subnodes():
|
||||||
|
wikilink_to_subnodes[subnode.wikilink].append(subnode)
|
||||||
|
|
||||||
|
# then we iterate over its values and construct nodes for each list of subnodes.
|
||||||
|
nodes = []
|
||||||
|
for wikilink in wikilink_to_subnodes:
|
||||||
|
node = Node(wikilink)
|
||||||
|
node.subnodes = wikilink_to_subnodes[wikilink]
|
||||||
|
nodes.append(node)
|
||||||
|
|
||||||
|
# remove journals if so desired.
|
||||||
|
if not include_journals:
|
||||||
|
nodes = [node for node in nodes if not util.is_journal(node.wikilink)]
|
||||||
|
|
||||||
|
# TODO: experiment with other ranking.
|
||||||
|
# return sorted(nodes, key=lambda x: -x.size())
|
||||||
|
return sorted(nodes, key=lambda x: x.wikilink.lower())
|
||||||
|
|
||||||
|
# does this belong here?
|
||||||
|
@cachetools.func.ttl_cache(maxsize=1, ttl=20)
|
||||||
|
def subnodes(self, sort=True):
|
||||||
|
subnodes = [Subnode(f) for f in glob.glob(os.path.join(config.AGORA_PATH, '**/*.md'), recursive=True)]
|
||||||
|
if sort:
|
||||||
|
return sorted(subnodes, key=lambda x: x.uri.lower())
|
||||||
else:
|
else:
|
||||||
G.nodes[subnode.wikilink] = Node(subnode.wikilink)
|
return subnodes
|
||||||
|
|
||||||
|
|
||||||
G = Graph()
|
G = Graph()
|
||||||
|
|
||||||
|
@ -81,11 +125,13 @@ class Node:
|
||||||
links.extend(subnode.forward_links)
|
links.extend(subnode.forward_links)
|
||||||
return sorted(set(links))
|
return sorted(set(links))
|
||||||
|
|
||||||
def pull_links(self):
|
# Pattern: (subject).action_object.
|
||||||
links = []
|
# Could be modeled with RDF?
|
||||||
|
def pull_nodes(self):
|
||||||
|
nodes = []
|
||||||
for subnode in self.subnodes:
|
for subnode in self.subnodes:
|
||||||
links.extend(subnode.pull_links())
|
nodes.extend(subnode.pull_nodes())
|
||||||
return sorted(set(links))
|
return sorted(set(nodes))
|
||||||
|
|
||||||
def push_links(self):
|
def push_links(self):
|
||||||
links = []
|
links = []
|
||||||
|
@ -112,7 +158,7 @@ class Subnode:
|
||||||
self.forward_links = content_to_forward_links(self.content)
|
self.forward_links = content_to_forward_links(self.content)
|
||||||
self.node = self.wikilink
|
self.node = self.wikilink
|
||||||
# Initiate node for wikilink if this is the first subnode, append otherwise.
|
# Initiate node for wikilink if this is the first subnode, append otherwise.
|
||||||
G.addsubnode(self)
|
# G.addsubnode(self)
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
# hack hack
|
# hack hack
|
||||||
|
@ -147,17 +193,17 @@ class Subnode:
|
||||||
sanitized_golinks.append('https://' + golink)
|
sanitized_golinks.append('https://' + golink)
|
||||||
return sanitized_golinks
|
return sanitized_golinks
|
||||||
|
|
||||||
def pull_links(self):
|
def pull_nodes(self):
|
||||||
"""
|
"""
|
||||||
returns a set of pull links contained in this subnode
|
returns a set of nodes pulled (anagora.org/node/pull) in this subnode
|
||||||
pull links are blocks of the form:
|
pulls are blocks of the form:
|
||||||
- [[pull]] [[node]]
|
- [[pull]] [[node]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# TODO: test.
|
# TODO: test.
|
||||||
pull_links = subnode_to_actions(self, 'pull')
|
pull_nodes = subnode_to_actions(self, 'pull')
|
||||||
entities = content_to_forward_links("\n".join(pull_links))
|
entities = content_to_forward_links("\n".join(pull_nodes))
|
||||||
return entities
|
return [Node(entity) for entity in entities]
|
||||||
|
|
||||||
def push_links(self):
|
def push_links(self):
|
||||||
"""
|
"""
|
||||||
|
@ -216,38 +262,8 @@ def content_to_forward_links(content):
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def all_subnodes(sort=True):
|
|
||||||
subnodes = [Subnode(f) for f in glob.glob(os.path.join(config.AGORA_PATH, '**/*.md'), recursive=True)]
|
|
||||||
if sort:
|
|
||||||
return sorted(subnodes, key=lambda x: x.uri.lower())
|
|
||||||
else:
|
|
||||||
return subnodes
|
|
||||||
|
|
||||||
def latest():
|
def latest():
|
||||||
subnodes = all_subnodes(sort=False)
|
return sorted(G.subnodes(), key=lambda x: -x.mtime)
|
||||||
return sorted(subnodes, key=lambda x: -x.mtime)
|
|
||||||
|
|
||||||
def all_nodes(include_journals=True):
|
|
||||||
# first we fetch all subnodes, put them in a dict {wikilink -> [subnode]}.
|
|
||||||
# hack hack -- there's something in itertools better than this.
|
|
||||||
wikilink_to_subnodes = defaultdict(list)
|
|
||||||
for subnode in all_subnodes():
|
|
||||||
wikilink_to_subnodes[subnode.wikilink].append(subnode)
|
|
||||||
|
|
||||||
# then we iterate over its values and construct nodes for each list of subnodes.
|
|
||||||
nodes = []
|
|
||||||
for wikilink in wikilink_to_subnodes:
|
|
||||||
node = Node(wikilink)
|
|
||||||
node.subnodes = wikilink_to_subnodes[wikilink]
|
|
||||||
nodes.append(node)
|
|
||||||
|
|
||||||
# remove journals if so desired.
|
|
||||||
if not include_journals:
|
|
||||||
nodes = [node for node in nodes if not util.is_journal(node.wikilink)]
|
|
||||||
|
|
||||||
# TODO: experiment with other ranking.
|
|
||||||
# return sorted(nodes, key=lambda x: -x.size())
|
|
||||||
return sorted(nodes, key=lambda x: x.wikilink.lower())
|
|
||||||
|
|
||||||
def all_users():
|
def all_users():
|
||||||
# hack hack.
|
# hack hack.
|
||||||
|
@ -256,12 +272,12 @@ def all_users():
|
||||||
|
|
||||||
def all_journals():
|
def all_journals():
|
||||||
# hack hack.
|
# hack hack.
|
||||||
nodes = all_nodes()
|
nodes = G.nodes()
|
||||||
nodes = [node for node in nodes if util.is_journal(node.wikilink)]
|
nodes = [node for node in nodes if util.is_journal(node.wikilink)]
|
||||||
return sorted(nodes, key=attrgetter('wikilink'), reverse=True)
|
return sorted(nodes, key=attrgetter('wikilink'), reverse=True)
|
||||||
|
|
||||||
def nodes_by_wikilink(wikilink):
|
def nodes_by_wikilink(wikilink):
|
||||||
nodes = [node for node in all_nodes() if node.wikilink == wikilink]
|
nodes = [node for node in G.nodes() if node.wikilink == wikilink]
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
def wikilink_to_node(node):
|
def wikilink_to_node(node):
|
||||||
|
@ -275,27 +291,27 @@ def wikilink_to_node(node):
|
||||||
def subnodes_by_wikilink(wikilink, fuzzy_matching=True):
|
def subnodes_by_wikilink(wikilink, fuzzy_matching=True):
|
||||||
if fuzzy_matching:
|
if fuzzy_matching:
|
||||||
# TODO
|
# TODO
|
||||||
subnodes = [subnode for subnode in all_subnodes() if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]
|
subnodes = [subnode for subnode in G.subnodes() if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]
|
||||||
else:
|
else:
|
||||||
subnodes = [subnode for subnode in all_subnodes() if subnode.wikilink == wikilink]
|
subnodes = [subnode for subnode in G.subnodes() if subnode.wikilink == wikilink]
|
||||||
return subnodes
|
return subnodes
|
||||||
|
|
||||||
def search_subnodes(query):
|
def search_subnodes(query):
|
||||||
subnodes = [subnode for subnode in all_subnodes() if re.search(query, subnode.content, re.IGNORECASE)]
|
subnodes = [subnode for subnode in G.subnodes() if re.search(query, subnode.content, re.IGNORECASE)]
|
||||||
return subnodes
|
return subnodes
|
||||||
|
|
||||||
def subnodes_by_user(user):
|
def subnodes_by_user(user):
|
||||||
subnodes = [subnode for subnode in all_subnodes() if subnode.user == user]
|
subnodes = [subnode for subnode in G.subnodes() if subnode.user == user]
|
||||||
return subnodes
|
return subnodes
|
||||||
|
|
||||||
def user_readmes(user):
|
def user_readmes(user):
|
||||||
# hack hack
|
# hack hack
|
||||||
# fix duplication.
|
# fix duplication.
|
||||||
subnodes = [subnode for subnode in all_subnodes() if subnode.user == user and re.search('readme', subnode.wikilink, re.IGNORECASE)]
|
subnodes = [subnode for subnode in G.subnodes() if subnode.user == user and re.search('readme', subnode.wikilink, re.IGNORECASE)]
|
||||||
return subnodes
|
return subnodes
|
||||||
|
|
||||||
def subnode_by_uri(uri):
|
def subnode_by_uri(uri):
|
||||||
subnode = [subnode for subnode in all_subnodes() if subnode.uri == uri]
|
subnode = [subnode for subnode in G.subnodes() if subnode.uri == uri]
|
||||||
if subnode:
|
if subnode:
|
||||||
return subnode[0]
|
return subnode[0]
|
||||||
else:
|
else:
|
||||||
|
@ -303,11 +319,11 @@ def subnode_by_uri(uri):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def nodes_by_outlink(wikilink):
|
def nodes_by_outlink(wikilink):
|
||||||
nodes = [node for node in all_nodes() if wikilink in node.forward_links()]
|
nodes = [node for node in G.nodes() if wikilink in node.forward_links()]
|
||||||
return sorted(nodes, key=attrgetter('wikilink'))
|
return sorted(nodes, key=attrgetter('wikilink'))
|
||||||
|
|
||||||
def subnodes_by_outlink(wikilink):
|
def subnodes_by_outlink(wikilink):
|
||||||
# This doesn't work. It matches too much/too little for some reason. Debug someday?
|
# This doesn't work. It matches too much/too little for some reason. Debug someday?
|
||||||
# subnodes = [subnode for subnode in all_subnodes() if [wikilink for wikilink in subnode.forward_links if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]]
|
# subnodes = [subnode for subnode in all_subnodes() if [wikilink for wikilink in subnode.forward_links if fuzz.ratio(subnode.wikilink, wikilink) > FUZZ_FACTOR]]
|
||||||
subnodes = [subnode for subnode in all_subnodes() if util.canonical_wikilink(wikilink) in subnode.forward_links]
|
subnodes = [subnode for subnode in G.subnodes() if util.canonical_wikilink(wikilink) in subnode.forward_links]
|
||||||
return subnodes
|
return subnodes
|
||||||
|
|
|
@ -31,16 +31,15 @@
|
||||||
<a href="/node/{{link}}">{{link}}</a><br />
|
<a href="/node/{{link}}">{{link}}</a><br />
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<br />
|
<br />
|
||||||
</div>
|
-->
|
||||||
|
|
||||||
<div class="pulllinks">
|
<div class="pulllinks">
|
||||||
<span class="pulllinks-header">pull</span><br />
|
<span class="pulllinks-header">pulled</span><br />
|
||||||
{% for link in pulllinks %}
|
{% for node in pull_nodes %}
|
||||||
<a href="/node/{{link}}">{{link}}</a><br />
|
<a href="/node/{{node.uri}}">{{node.uri}}</a><br />
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
<br />
|
<br />
|
||||||
</div>
|
</div>
|
||||||
-->
|
|
||||||
|
|
||||||
<div class="forwardlinks">
|
<div class="forwardlinks">
|
||||||
<span class="forwardlinks-header">→ forward</span><br />
|
<span class="forwardlinks-header">→ forward</span><br />
|
||||||
|
|
|
@ -39,6 +39,22 @@ Try listing <a href="/nodes">nodes</a> or perhaps <a href="/search">search</a>.
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
<!--
|
||||||
|
{% for node in pull_nodes %}
|
||||||
|
<div class="node">
|
||||||
|
<span class="node-header"><strong>Pulled node</strong> <a href="/node/{{node.uri}}">[[{{node.uri}}]]</a></span>
|
||||||
|
{{node.subnodes}}
|
||||||
|
|
||||||
|
{% for subnode in node.subnodes %}
|
||||||
|
<div class="subnode">
|
||||||
|
<span class="subnode-header"><strong>Subnode</strong> <a href="{{subnode.url}}">{{subnode.uri}}</a> by <a href="/@{{subnode.user}}">@{{subnode.user}}</a></span>
|
||||||
|
{{ subnode.content|markdown|linkify|safe }}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
-->
|
||||||
|
|
||||||
{% include "links.html" %}
|
{% include "links.html" %}
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
bleach==3.2.1
|
bleach==3.2.1
|
||||||
|
cachetools==4.2.0
|
||||||
click==7.1.2
|
click==7.1.2
|
||||||
|
dateparser==1.0.0
|
||||||
Flask==1.1.2
|
Flask==1.1.2
|
||||||
Flask-Markdown==0.3
|
Flask-Markdown==0.3
|
||||||
Flask-WTF==0.14.3
|
Flask-WTF==0.14.3
|
||||||
|
@ -9,16 +11,21 @@ itsdangerous==1.1.0
|
||||||
jedi==0.17.2
|
jedi==0.17.2
|
||||||
Jinja2==2.11.2
|
Jinja2==2.11.2
|
||||||
Markdown==3.3.3
|
Markdown==3.3.3
|
||||||
|
marko==0.9.1
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
|
mdx-truly-sane-lists==1.2
|
||||||
packaging==20.4
|
packaging==20.4
|
||||||
parso==0.7.1
|
parso==0.7.1
|
||||||
|
pkg-resources==0.0.0
|
||||||
pyparsing==2.4.7
|
pyparsing==2.4.7
|
||||||
|
python-dateutil==2.8.1
|
||||||
python-Levenshtein==0.12.0
|
python-Levenshtein==0.12.0
|
||||||
|
pytz==2020.4
|
||||||
|
regex==2020.11.13
|
||||||
six==1.15.0
|
six==1.15.0
|
||||||
|
tzlocal==2.1
|
||||||
uWSGI==2.0.19.1
|
uWSGI==2.0.19.1
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
WTForms==2.3.3
|
WTForms==2.3.3
|
||||||
zipp==3.4.0
|
zipp==3.4.0
|
||||||
dateparser==1.0.0
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue