Optimise pre-processing regular expression
This commit is contained in:
parent
f5529ed12a
commit
ba6c5be990
1 changed files with 6 additions and 11 deletions
|
@ -12,17 +12,12 @@
|
||||||
update-tree-wiki)
|
update-tree-wiki)
|
||||||
|
|
||||||
(define (preprocess-html-wiki html)
|
(define (preprocess-html-wiki html)
|
||||||
(define ((rr* find replace) contents)
|
(regexp-replace* #rx"(<(?:td|figcaption)[^>]*?>\n?)(?:<li>|[ \t]*?<p class=\"caption\">(.*?)</p>)"
|
||||||
(regexp-replace* find contents replace))
|
html (λ (whole first-tag [contents #f])
|
||||||
((compose1
|
(if (eq? (string-ref whole 1) #\f) ;; figcaption
|
||||||
; fix navbox list nesting
|
(string-append first-tag "<span class=\"caption\">" contents "</span>")
|
||||||
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
|
(string-append first-tag "<ul><li>")))))
|
||||||
; add a <ul> to make the parser happy
|
|
||||||
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
|
|
||||||
(rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
|
|
||||||
; change <figcaption><p> to <figcaption><span> to make the parser happy
|
|
||||||
(rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
|
|
||||||
html))
|
|
||||||
(module+ test
|
(module+ test
|
||||||
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
|
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
|
||||||
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
|
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
|
||||||
|
|
Loading…
Reference in a new issue