2020-08-12 19:29:32 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# This script is based on <https://github.com/m-pilia/vim-mediawiki/blob/266d7ab9c7d4d7924e16f527c109c65851da0164/scripts/preview.py>
|
|
|
|
|
|
|
|
# Useful resources:
|
|
|
|
# <https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse>
|
|
|
|
# <https://www.mediawiki.org/wiki/API:Styling_content>
|
|
|
|
# <https://www.mediawiki.org/wiki/ResourceLoader/Developing_with_ResourceLoader>
|
|
|
|
# <https://github.com/wikimedia/mediawiki>
|
|
|
|
# <https://github.com/wikimedia/Vector>
|
|
|
|
# and, most importantly: `view-source:` for any MediaWiki page
|
|
|
|
|
2020-08-13 16:43:40 +00:00
|
|
|
# Skin-specific notes:
|
|
|
|
|
|
|
|
# citizen
|
|
|
|
#
|
|
|
|
# - Fonts are not loaded due to CSP. One possible mitigation is running a
|
|
|
|
# local proxy and setting `Access-Control-Allow-Origin` to `*`. An example
|
|
|
|
# nginx configuration:
|
|
|
|
#
|
|
|
|
# server {
|
|
|
|
# listen 80;
|
|
|
|
# server_name 127.155.44.48; # This address was chosen randomly
|
|
|
|
# location / {
|
|
|
|
# proxy_pass https://wiki.c2dl.info; # Replace with your wiki
|
|
|
|
# add_header Access-Control-Allow-Origin *;
|
|
|
|
# }
|
|
|
|
# }
|
|
|
|
#
|
|
|
|
# By modifying `/etc/hosts` this can be made into its own "domain":
|
|
|
|
#
|
|
|
|
# 127.155.44.48 local.wiki.c2dl.info
|
|
|
|
#
|
|
|
|
# - The module `skins.citizen.scripts.toc` is broken because to find and
|
|
|
|
# highlight the current section header in the table of contents it uses a
|
|
|
|
# CSS selector incompatible with the URL rewriting applied to anchor links.
|
|
|
|
#
|
|
|
|
# - The module `skins.citizen.scripts` references search inputs which aren't
|
|
|
|
# created by this script.
|
|
|
|
|
|
|
|
|
2020-08-12 19:29:32 +00:00
|
|
|
import argparse
|
|
|
|
import mwclient
|
|
|
|
import json
|
|
|
|
from urllib.parse import urlencode
|
|
|
|
import html
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
LANG = "en"
|
|
|
|
LANG_TEXT_DIRECTION = "ltr"
|
|
|
|
|
2020-08-13 16:05:05 +00:00
|
|
|
MODULES_POST_LOAD = {
|
|
|
|
"vector": [
|
|
|
|
"site",
|
|
|
|
"mediawiki.page.startup",
|
|
|
|
"mediawiki.page.ready",
|
|
|
|
"mediawiki.toc",
|
|
|
|
# "mediawiki.searchSuggest",
|
|
|
|
# "mediawiki.page.watch.ajax",
|
|
|
|
"skins.vector.js",
|
|
|
|
],
|
|
|
|
"citizen": [
|
|
|
|
# "site",
|
|
|
|
# "mediawiki.page.startup",
|
|
|
|
# "mediawiki.page.ready",
|
|
|
|
# "mediawiki.toc",
|
|
|
|
# "skins.citizen.scripts.toc",
|
|
|
|
# "skins.citizen.scripts.search",
|
|
|
|
# "skins.citizen.styles.search",
|
|
|
|
# "skins.citizen.icons.search",
|
2020-08-13 16:43:40 +00:00
|
|
|
# "skins.citizen.scripts",
|
|
|
|
],
|
|
|
|
}
|
|
|
|
|
|
|
|
MODULES_POST_LOAD_BLOCKED = {
|
|
|
|
"citizen": [
|
|
|
|
"skins.citizen.scripts.toc",
|
|
|
|
"skins.citizen.scripts.search",
|
|
|
|
"skins.citizen.styles.search",
|
|
|
|
"skins.citizen.icons.search",
|
2020-08-13 16:05:05 +00:00
|
|
|
],
|
|
|
|
}
|
|
|
|
|
|
|
|
MODULES_PRELOAD_STYLES = {
|
|
|
|
"vector": [
|
|
|
|
"mediawiki.legacy.commonPrint",
|
|
|
|
"mediawiki.legacy.shared",
|
|
|
|
"mediawiki.skinning.interface",
|
|
|
|
"mediawiki.toc.styles",
|
|
|
|
"skins.vector.styles",
|
|
|
|
"site.styles",
|
|
|
|
],
|
|
|
|
"citizen": [
|
|
|
|
# "mediawiki.legacy.commonPrint",
|
|
|
|
# "mediawiki.legacy.shared",
|
|
|
|
"mediawiki.skinning.content.externallinks",
|
|
|
|
# "mediawiki.toc.styles",
|
|
|
|
"skins.citizen.icons",
|
|
|
|
"skins.citizen.styles",
|
|
|
|
"skins.citizen.icons.ca",
|
|
|
|
"skins.citizen.icons.es",
|
|
|
|
"skins.citizen.icons.footer",
|
|
|
|
"skins.citizen.icons.n",
|
|
|
|
"skins.citizen.icons.pt",
|
|
|
|
"skins.citizen.icons.t",
|
|
|
|
"skins.citizen.styles.fonts",
|
|
|
|
"skins.citizen.styles.toc",
|
|
|
|
"site.styles",
|
|
|
|
],
|
|
|
|
}
|
|
|
|
|
|
|
|
MODULES_PRELOAD_SCRIPTS = {
|
|
|
|
"vector": ["startup"],
|
|
|
|
"citizen": ["startup"],
|
|
|
|
}
|
2020-08-12 19:29:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
# ported from <https://github.com/wikimedia/mediawiki/blob/c15ded31a6ca79fa65c00d151a7220632ad90b6d/includes/parser/Sanitizer.php#L1205-L1222>
|
|
|
|
def escape_css_class(class_str):
|
|
|
|
class_str = re.sub(
|
|
|
|
r"""(^[0-9\-])|[\x00-\x20!"#$%&'()*+,.\/:;<=>?@[\]^`{|}~]|\xA0""",
|
|
|
|
"_",
|
|
|
|
class_str,
|
|
|
|
)
|
|
|
|
class_str = re.sub(r"_+", "_", class_str)
|
|
|
|
class_str = class_str.rstrip("_")
|
|
|
|
return class_str
|
|
|
|
|
|
|
|
|
|
|
|
def json_dumps_compact(data):
|
|
|
|
return json.dumps(data, indent=None, separators=(",", ":"))
|
|
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
parser.add_argument("--site", type=str, required=True)
|
|
|
|
parser.add_argument("--scheme", type=str, default="https")
|
2020-08-13 16:05:05 +00:00
|
|
|
parser.add_argument("--skin", type=str, default="vector")
|
2020-08-12 19:29:32 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--input", type=str, required=True,
|
|
|
|
)
|
|
|
|
parser.add_argument("--title", type=str)
|
|
|
|
parser.add_argument("--output", type=str, required=True)
|
2020-08-13 16:05:05 +00:00
|
|
|
cli_args = parser.parse_args()
|
2020-08-12 19:29:32 +00:00
|
|
|
|
|
|
|
|
2020-08-13 16:05:05 +00:00
|
|
|
site = mwclient.Site(cli_args.site, scheme=cli_args.scheme)
|
2020-08-12 19:29:32 +00:00
|
|
|
|
|
|
|
|
2020-08-13 16:05:05 +00:00
|
|
|
def get_load_script_url(**args):
|
|
|
|
return "{path}load{ext}?{args}".format(
|
|
|
|
path=site.path,
|
|
|
|
ext=site.ext,
|
|
|
|
args=urlencode({"lang": LANG, "skin": cli_args.skin, **args}),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
with open(cli_args.input, "r") as f:
|
2020-08-12 19:29:32 +00:00
|
|
|
wikitext_str = f.read()
|
|
|
|
|
|
|
|
result = site.post(
|
|
|
|
"parse",
|
2020-08-13 16:05:05 +00:00
|
|
|
title=cli_args.title,
|
2020-08-12 19:29:32 +00:00
|
|
|
text=wikitext_str,
|
|
|
|
contentmodel="wikitext",
|
|
|
|
prop="text|indicators|displaytitle|modules|jsconfigvars|categorieshtml",
|
|
|
|
preview=True,
|
2020-08-16 21:38:54 +00:00
|
|
|
pst=True, # pre-save transforms
|
2020-08-12 19:29:32 +00:00
|
|
|
sectionpreview=False,
|
2020-08-16 21:38:54 +00:00
|
|
|
disableeditsection=True, # disables "[edit]" links next to headers
|
2020-08-13 16:05:05 +00:00
|
|
|
useskin=cli_args.skin,
|
2020-08-12 19:29:32 +00:00
|
|
|
uselang=LANG,
|
|
|
|
)["parse"]
|
|
|
|
|
|
|
|
|
2020-08-13 16:43:40 +00:00
|
|
|
def get_modules(page_modules, added_modules_dict, blocked_modules_dict={}):
|
|
|
|
modules = page_modules + added_modules_dict[cli_args.skin]
|
|
|
|
for blocked_module in blocked_modules_dict.get(cli_args.skin, []):
|
|
|
|
try:
|
|
|
|
modules.remove(blocked_module)
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
return modules
|
|
|
|
|
|
|
|
|
2020-08-12 19:29:32 +00:00
|
|
|
rendered_html = """\
|
|
|
|
<!DOCTYPE html>
|
|
|
|
<html class="client-nojs" lang="{lang}" dir="{text_dir}">
|
|
|
|
|
|
|
|
<head>
|
|
|
|
<meta charset="UTF-8"/>
|
|
|
|
<base href="{base_url}"/>
|
|
|
|
<script>document.documentElement.className="client-js";RLSTATE={page_modules_state_json};RLCONF={page_config_json};RLPAGEMODULES={page_modules_json};</script>
|
|
|
|
<script>(RLQ=window.RLQ||[]).push(function(){{mw.loader.implement("user.tokens",function(){{mw.user.tokens.set({{"editToken":"+\\\\","patrolToken":"+\\\\","watchToken":"+\\\\","csrfToken":"+\\\\"}});}});}});</script>
|
|
|
|
<link rel="stylesheet" href="{style_url}"/>
|
|
|
|
<script async="" src="{script_url}"></script>
|
|
|
|
</head>
|
|
|
|
|
|
|
|
<body class="mediawiki {text_dir} sitedir-{text_dir} mw-hide-empty-elt page-{page_class} skin-{skin} action-view">
|
|
|
|
<div id="mw-page-base" class="noprint"></div>
|
|
|
|
<div id="mw-head-base" class="noprint"></div>
|
|
|
|
|
|
|
|
<div id="content" class="mw-body" role="main">
|
|
|
|
<a id="top"></a>
|
|
|
|
|
|
|
|
<div class="mw-indicators mw-body-content">
|
|
|
|
{indicators_html}
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<h1 id="firstHeading" class="firstHeading" lang="{lang}">{title}</h1>
|
|
|
|
|
|
|
|
<div id="bodyContent" class="mw-body-content">
|
|
|
|
<div id="contentSub"></div>
|
|
|
|
<div id="jump-to-nav"></div>
|
|
|
|
|
|
|
|
<div id="mw-content-text" lang="{lang}" dir="{text_dir}" class="mw-content-{text_dir}">
|
|
|
|
{content_html}
|
|
|
|
</div>
|
|
|
|
|
|
|
|
{categories_html}
|
|
|
|
<div class="visualClear"></div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<div id="footer" role="contentinfo"></div>
|
2020-08-12 19:55:54 +00:00
|
|
|
|
|
|
|
<script>(function(){{
|
|
|
|
var anchorHrefPrefix = window.location.href.replace(/#.+$/, "")
|
|
|
|
|
|
|
|
var links = document.getElementsByTagName("a")
|
|
|
|
for (var i = 0, len = links.length; i < len; i++) {{
|
|
|
|
var link = links[i]
|
|
|
|
var href = link.getAttribute("href")
|
|
|
|
if (typeof href === "string" && href[0] === "#") {{
|
|
|
|
link.setAttribute("href", anchorHrefPrefix + href)
|
|
|
|
}}
|
|
|
|
}}
|
|
|
|
}}())</script>
|
2020-08-12 19:29:32 +00:00
|
|
|
</body>
|
|
|
|
|
|
|
|
</html>
|
|
|
|
""".format(
|
|
|
|
lang=html.escape(LANG),
|
|
|
|
text_dir=html.escape(LANG_TEXT_DIRECTION),
|
|
|
|
base_url=html.escape("{}://{}".format(site.scheme, site.host)),
|
|
|
|
page_modules_state_json=json_dumps_compact(
|
|
|
|
{
|
|
|
|
"noscript": "ready",
|
|
|
|
"user.options": "ready",
|
|
|
|
"user.tokens": "loading",
|
2020-08-13 16:05:05 +00:00
|
|
|
**{name: "ready" for name in MODULES_PRELOAD_STYLES[cli_args.skin]},
|
2020-08-12 19:29:32 +00:00
|
|
|
}
|
|
|
|
),
|
|
|
|
page_config_json=json_dumps_compact(result["jsconfigvars"]),
|
2020-08-13 16:05:05 +00:00
|
|
|
page_modules_json=json_dumps_compact(
|
2020-08-13 16:43:40 +00:00
|
|
|
get_modules(result["modules"], MODULES_POST_LOAD, MODULES_POST_LOAD_BLOCKED)
|
2020-08-13 16:05:05 +00:00
|
|
|
),
|
2020-08-12 19:29:32 +00:00
|
|
|
style_url=html.escape(
|
|
|
|
get_load_script_url(
|
|
|
|
only="styles",
|
2020-08-13 16:05:05 +00:00
|
|
|
modules="|".join(
|
2020-08-13 16:43:40 +00:00
|
|
|
get_modules(result["modulestyles"], MODULES_PRELOAD_STYLES)
|
2020-08-13 16:05:05 +00:00
|
|
|
),
|
2020-08-12 19:29:32 +00:00
|
|
|
)
|
|
|
|
),
|
|
|
|
script_url=html.escape(
|
|
|
|
get_load_script_url(
|
|
|
|
only="scripts",
|
2020-08-13 16:05:05 +00:00
|
|
|
modules="|".join(
|
2020-08-13 16:43:40 +00:00
|
|
|
get_modules(result["modulescripts"], MODULES_PRELOAD_SCRIPTS)
|
2020-08-13 16:05:05 +00:00
|
|
|
),
|
2020-08-12 19:29:32 +00:00
|
|
|
raw="1",
|
|
|
|
)
|
|
|
|
),
|
2020-08-13 16:05:05 +00:00
|
|
|
skin=html.escape(cli_args.skin),
|
2020-08-12 19:29:32 +00:00
|
|
|
page_class=html.escape(escape_css_class(result["displaytitle"])),
|
|
|
|
title=html.escape(result["displaytitle"]),
|
|
|
|
indicators_html="\n".join(
|
|
|
|
[
|
|
|
|
'<div id="mw-indicator-{}" class="mw-indicator">{}</div>'.format(
|
|
|
|
indicator["name"], indicator["*"]
|
|
|
|
)
|
|
|
|
for indicator in result["indicators"]
|
|
|
|
]
|
|
|
|
),
|
|
|
|
content_html=result["text"]["*"],
|
|
|
|
categories_html=result["categorieshtml"]["*"],
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-08-13 16:05:05 +00:00
|
|
|
with open(cli_args.output, "w") as f:
|
2020-08-12 19:29:32 +00:00
|
|
|
f.write(rendered_html)
|