[scripts] add a MediaWiki page previewer script

This commit is contained in:
Dmytro Meleshko 2020-08-12 22:29:32 +03:00
parent 23b00f41a4
commit 3a02a30ef3

191
scripts/mediawiki-preview Executable file
View file

@ -0,0 +1,191 @@
#!/usr/bin/env python3
# This script is based on <https://github.com/m-pilia/vim-mediawiki/blob/266d7ab9c7d4d7924e16f527c109c65851da0164/scripts/preview.py>
# Useful resources:
# <https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse>
# <https://www.mediawiki.org/wiki/API:Styling_content>
# <https://www.mediawiki.org/wiki/ResourceLoader/Developing_with_ResourceLoader>
# <https://github.com/wikimedia/mediawiki>
# <https://github.com/wikimedia/Vector>
# and, most importantly: `view-source:` for any MediaWiki page
import argparse
import mwclient
import json
from urllib.parse import urlencode
import html
import re
SKIN = "vector"
LANG = "en"
LANG_TEXT_DIRECTION = "ltr"
MODULES_POST_LOAD = [
"site",
"mediawiki.page.startup",
"mediawiki.page.ready",
"mediawiki.toc",
# "mediawiki.searchSuggest",
# "mediawiki.page.watch.ajax",
"skins.{}.js".format(SKIN),
]
MODULES_PRELOAD_STYLES = [
"mediawiki.legacy.commonPrint",
"mediawiki.legacy.shared",
"mediawiki.skinning.interface",
"mediawiki.toc.styles",
"skins.{}.styles".format(SKIN),
"site.styles",
]
MODULES_PRELOAD_SCRIPTS = [
"startup",
]
# ported from <https://github.com/wikimedia/mediawiki/blob/c15ded31a6ca79fa65c00d151a7220632ad90b6d/includes/parser/Sanitizer.php#L1205-L1222>
def escape_css_class(class_str):
class_str = re.sub(
r"""(^[0-9\-])|[\x00-\x20!"#$%&'()*+,.\/:;<=>?@[\]^`{|}~]|\xA0""",
"_",
class_str,
)
class_str = re.sub(r"_+", "_", class_str)
class_str = class_str.rstrip("_")
return class_str
def json_dumps_compact(data):
return json.dumps(data, indent=None, separators=(",", ":"))
parser = argparse.ArgumentParser()
parser.add_argument("--site", type=str, required=True)
parser.add_argument("--scheme", type=str, default="https")
parser.add_argument(
"--input", type=str, required=True,
)
parser.add_argument("--title", type=str)
parser.add_argument("--output", type=str, required=True)
args = parser.parse_args()
site = mwclient.Site(args.site, scheme=args.scheme)
with open(args.input, "r") as f:
wikitext_str = f.read()
result = site.post(
"parse",
title=args.title,
text=wikitext_str,
contentmodel="wikitext",
prop="text|indicators|displaytitle|modules|jsconfigvars|categorieshtml",
preview=True,
sectionpreview=False,
disableeditsection=True,
useskin=SKIN,
uselang=LANG,
)["parse"]
def get_load_script_url(**args):
return "{path}load{ext}?{args}".format(
path=site.path,
ext=site.ext,
args=urlencode({"lang": LANG, "skin": SKIN, **args}),
)
rendered_html = """\
<!DOCTYPE html>
<html class="client-nojs" lang="{lang}" dir="{text_dir}">
<head>
<meta charset="UTF-8"/>
<base href="{base_url}"/>
<script>document.documentElement.className="client-js";RLSTATE={page_modules_state_json};RLCONF={page_config_json};RLPAGEMODULES={page_modules_json};</script>
<script>(RLQ=window.RLQ||[]).push(function(){{mw.loader.implement("user.tokens",function(){{mw.user.tokens.set({{"editToken":"+\\\\","patrolToken":"+\\\\","watchToken":"+\\\\","csrfToken":"+\\\\"}});}});}});</script>
<link rel="stylesheet" href="{style_url}"/>
<script async="" src="{script_url}"></script>
</head>
<body class="mediawiki {text_dir} sitedir-{text_dir} mw-hide-empty-elt page-{page_class} skin-{skin} action-view">
<div id="mw-page-base" class="noprint"></div>
<div id="mw-head-base" class="noprint"></div>
<div id="content" class="mw-body" role="main">
<a id="top"></a>
<div class="mw-indicators mw-body-content">
{indicators_html}
</div>
<h1 id="firstHeading" class="firstHeading" lang="{lang}">{title}</h1>
<div id="bodyContent" class="mw-body-content">
<div id="contentSub"></div>
<div id="jump-to-nav"></div>
<div id="mw-content-text" lang="{lang}" dir="{text_dir}" class="mw-content-{text_dir}">
{content_html}
</div>
{categories_html}
<div class="visualClear"></div>
</div>
</div>
<div id="footer" role="contentinfo"></div>
</body>
</html>
""".format(
lang=html.escape(LANG),
text_dir=html.escape(LANG_TEXT_DIRECTION),
base_url=html.escape("{}://{}".format(site.scheme, site.host)),
page_modules_state_json=json_dumps_compact(
{
"noscript": "ready",
"user.options": "ready",
"user.tokens": "loading",
**{name: "ready" for name in MODULES_PRELOAD_STYLES},
}
),
page_config_json=json_dumps_compact(result["jsconfigvars"]),
page_modules_json=json_dumps_compact(result["modules"] + MODULES_POST_LOAD),
style_url=html.escape(
get_load_script_url(
only="styles",
modules="|".join(result["modulestyles"] + MODULES_PRELOAD_STYLES),
)
),
script_url=html.escape(
get_load_script_url(
only="scripts",
modules="|".join(result["modulescripts"] + MODULES_PRELOAD_SCRIPTS),
raw="1",
)
),
skin=html.escape(SKIN),
page_class=html.escape(escape_css_class(result["displaytitle"])),
title=html.escape(result["displaytitle"]),
indicators_html="\n".join(
[
'<div id="mw-indicator-{}" class="mw-indicator">{}</div>'.format(
indicator["name"], indicator["*"]
)
for indicator in result["indicators"]
]
),
content_html=result["text"]["*"],
categories_html=result["categorieshtml"]["*"],
)
with open(args.output, "w") as f:
f.write(rendered_html)