Optimise pre-processing regular expression

This commit is contained in:
Cadence Ember 2023-04-02 00:04:35 +13:00
parent f5529ed12a
commit ba6c5be990
Signed by: cadence
GPG key ID: BC1C2C61CF521B17

View file

@ -12,17 +12,12 @@
update-tree-wiki) update-tree-wiki)
(define (preprocess-html-wiki html) (define (preprocess-html-wiki html)
(define ((rr* find replace) contents) (regexp-replace* #rx"(<(?:td|figcaption)[^>]*?>\n?)(?:<li>|[ \t]*?<p class=\"caption\">(.*?)</p>)"
(regexp-replace* find contents replace)) html (λ (whole first-tag [contents #f])
((compose1 (if (eq? (string-ref whole 1) #\f) ;; figcaption
; fix navbox list nesting (string-append first-tag "<span class=\"caption\">" contents "</span>")
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree (string-append first-tag "<ul><li>")))))
; add a <ul> to make the parser happy
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
(rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
; change <figcaption><p> to <figcaption><span> to make the parser happy
(rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
html))
(module+ test (module+ test
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>") (check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>") "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")