2022-08-23 09:57:42 +00:00
|
|
|
#lang racket/base
|
|
|
|
(require racket/dict
|
|
|
|
racket/function
|
|
|
|
racket/list
|
2022-09-16 12:56:05 +00:00
|
|
|
racket/match
|
2022-08-23 09:57:42 +00:00
|
|
|
racket/string
|
|
|
|
; libs
|
|
|
|
(prefix-in easy: net/http-easy)
|
|
|
|
; html libs
|
|
|
|
html-parsing
|
|
|
|
html-writing
|
|
|
|
; web server libs
|
|
|
|
net/url
|
|
|
|
web-server/http
|
|
|
|
web-server/dispatchers/dispatch
|
|
|
|
; my libs
|
2022-09-11 11:21:37 +00:00
|
|
|
"application-globals.rkt"
|
2022-09-05 01:38:16 +00:00
|
|
|
"config.rkt"
|
2022-09-16 13:56:03 +00:00
|
|
|
"data.rkt"
|
2022-08-23 09:57:42 +00:00
|
|
|
"pure-utils.rkt"
|
2022-09-16 13:56:03 +00:00
|
|
|
"syntax.rkt"
|
2022-12-04 10:46:24 +00:00
|
|
|
"tree-updater.rkt"
|
2022-08-23 09:57:42 +00:00
|
|
|
"xexpr-utils.rkt"
|
2022-09-11 11:21:37 +00:00
|
|
|
"url-utils.rkt")
|
2022-08-23 09:57:42 +00:00
|
|
|
|
|
|
|
(provide
|
2022-09-11 11:21:37 +00:00
|
|
|
; used by the web server
|
|
|
|
page-wiki
|
|
|
|
; used by page-category, and similar pages that are partially wiki pages
|
|
|
|
update-tree-wiki
|
|
|
|
preprocess-html-wiki)
|
2022-08-23 09:57:42 +00:00
|
|
|
|
|
|
|
(module+ test
|
2022-12-04 10:46:24 +00:00
|
|
|
(require rackunit))
|
2022-08-23 09:57:42 +00:00
|
|
|
|
|
|
|
(define (preprocess-html-wiki html)
|
2022-12-04 10:46:24 +00:00
|
|
|
(define ((rr* find replace) contents)
|
2022-08-23 09:57:42 +00:00
|
|
|
(regexp-replace* find contents replace))
|
|
|
|
((compose1
|
|
|
|
; fix navbox list nesting
|
|
|
|
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
|
|
|
|
; add a <ul> to make the parser happy
|
|
|
|
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
|
2022-12-04 10:46:24 +00:00
|
|
|
(rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
|
2022-08-23 09:57:42 +00:00
|
|
|
; change <figcaption><p> to <figcaption><span> to make the parser happy
|
2022-12-04 10:46:24 +00:00
|
|
|
(rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
|
2022-08-23 09:57:42 +00:00
|
|
|
html))
|
|
|
|
(module+ test
|
|
|
|
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
|
|
|
|
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
|
|
|
|
(check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> <p class=\"caption\">Caption text.</p></figcaption></figure>")
|
|
|
|
"<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
|
|
|
|
|
|
|
|
(define (page-wiki req)
|
|
|
|
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
2022-11-29 11:03:54 +00:00
|
|
|
(define user-cookies (user-cookies-getter req))
|
2022-08-23 09:57:42 +00:00
|
|
|
(define origin (format "https://~a.fandom.com" wikiname))
|
|
|
|
(define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
|
|
|
|
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
|
|
|
|
|
2022-09-16 13:56:03 +00:00
|
|
|
(thread-let
|
|
|
|
([dest-res (define dest-url
|
|
|
|
(format "~a/api.php?~a"
|
|
|
|
origin
|
|
|
|
(params->query `(("action" . "parse")
|
|
|
|
("page" . ,path)
|
|
|
|
("prop" . "text|headhtml|langlinks")
|
|
|
|
("formatversion" . "2")
|
|
|
|
("format" . "json")))))
|
2022-10-04 08:13:07 +00:00
|
|
|
(log-outgoing dest-url)
|
2022-11-29 11:03:54 +00:00
|
|
|
(easy:get dest-url
|
|
|
|
#:timeouts timeouts
|
|
|
|
#:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies)))))]
|
2022-10-09 09:50:50 +00:00
|
|
|
[siteinfo (siteinfo-fetch wikiname)])
|
2022-08-23 09:57:42 +00:00
|
|
|
|
2022-09-16 13:56:03 +00:00
|
|
|
(cond
|
|
|
|
[(eq? 200 (easy:response-status-code dest-res))
|
|
|
|
(let* ([data (easy:response-json dest-res)]
|
|
|
|
[title (jp "/parse/title" data "")]
|
|
|
|
[page-html (jp "/parse/text" data "")]
|
|
|
|
[page-html (preprocess-html-wiki page-html)]
|
|
|
|
[page (html->xexp page-html)]
|
2022-10-31 06:39:19 +00:00
|
|
|
[head-data ((head-data-getter wikiname) data)])
|
2022-09-16 13:56:03 +00:00
|
|
|
(if (equal? "missingtitle" (jp "/error/code" data #f))
|
|
|
|
(next-dispatcher)
|
|
|
|
(response-handler
|
|
|
|
(define body
|
|
|
|
(generate-wiki-page
|
|
|
|
(update-tree-wiki page wikiname)
|
2022-11-29 11:03:54 +00:00
|
|
|
#:req req
|
2022-09-16 13:56:03 +00:00
|
|
|
#:source-url source-url
|
|
|
|
#:wikiname wikiname
|
|
|
|
#:title title
|
2022-10-31 06:39:19 +00:00
|
|
|
#:head-data head-data
|
2022-10-09 09:50:50 +00:00
|
|
|
#:siteinfo siteinfo))
|
2022-09-16 13:56:03 +00:00
|
|
|
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
|
2022-10-09 10:43:21 +00:00
|
|
|
(define headers
|
|
|
|
(build-headers
|
|
|
|
always-headers
|
|
|
|
(when redirect-msg
|
2022-11-29 11:03:54 +00:00
|
|
|
(let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))]
|
|
|
|
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
|
|
|
|
(header #"Refresh" value)))))
|
2022-09-16 13:56:03 +00:00
|
|
|
(when (config-true? 'debug)
|
|
|
|
; used for its side effects
|
|
|
|
; convert to string with error checking, error will be raised if xexp is invalid
|
|
|
|
(xexp->html body))
|
|
|
|
(response/output
|
|
|
|
#:code 200
|
|
|
|
#:headers headers
|
|
|
|
(λ (out)
|
|
|
|
(write-html body out))))))])))
|