breezewiki/src/extwiki-generic.rkt

130 lines
6 KiB
Racket

#lang racket/base
(require racket/list
racket/match
racket/string
memo
net/http-easy
html-parsing
"../lib/pure-utils.rkt"
"../lib/syntax.rkt"
"../lib/url-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
get-redirect-content)
(module+ test
(require rackunit))
;; fandom wikinames * Title * Main Page * Search page override * API endpoint override
(define wikis
'(((gallowmere) "MediEvil Wiki" "https://medievil.wiki/w/Main_Page" #f #f)
((fallout) "Fallout Wiki" "https://fallout.wiki/wiki/Fallout_Wiki" #f "https://fallout.wiki/api.php")
((drawntolife) "Wapopedia" "https://drawntolife.wiki/en/Main_Page" #f "https://drawntolife.wiki/w/api.php")
))
(define wikis-hash (make-hash))
(for ([w wikis])
(for ([wikiname (car w)])
(hash-set! wikis-hash (symbol->string wikiname) w)))
(module+ test
(check-equal? (cadr (hash-ref wikis-hash "gallowmere"))
"MediEvil Wiki"))
(define (parse-table table)
(define rows (query-selector (λ (t a c) (eq? t 'tr)) table))
(define header-row (rows))
(define column-names
(for/list ([th (in-producer (query-selector (λ (t a c) (eq? t 'th)) header-row) #f)])
(string->symbol (string-downcase (string-trim (findf string? th))))))
(define data-row (rows))
(for/hasheq ([col-name column-names]
[col-value (in-producer (query-selector (λ (t a c) (eq? t 'td)) data-row) #f)])
(values col-name (filter element-is-content? (cdr col-value)))))
(module+ test
(check-equal? (parse-table (html->xexp "<table> <tbody><tr> <th>Links</th></tr> <tr> <td><a target=\"_blank\" rel=\"nofollow noreferrer noopener\" class=\"external text\" href=\"https://sirdanielfortesque.proboards.com/\">Forum</a></td></tr></tbody></table>"))
'#hasheq((links . ((a (@ (target "_blank") (rel "nofollow noreferrer noopener") (class "external text") (href "https://sirdanielfortesque.proboards.com/")) "Forum"))))))
(define (table->links table)
(define v (hash-ref table 'links #f))
(cond/var
[(not v) (values null '("Data table must have a \"Links\" column"))]
(var links (filter (λ (a) (and (pair? a) (eq? (car a) 'a))) v)) ; <a> elements
[(null? links) (values null '("Links column must have at least one link"))]
[#t (values links null)]))
(define (table->logo table)
(define logo (hash-ref table 'logo #f))
(cond/var
[(not logo) (values #f '("Data table must have a \"Logo\" column"))]
[(null? logo) (values #f '("Logo table column must have a link"))]
(var href (get-attribute 'href (bits->attributes (car (hash-ref table 'logo)))))
(var src (get-attribute 'src (bits->attributes (car (hash-ref table 'logo)))))
(var true-src (or href src))
[(not true-src) (values #f '("Logo table column must have a link"))]
[#t (values true-src null)]))
(define (get-api-endpoint wiki)
(define main-page (third wiki))
(define override (fifth wiki))
(or override
(match main-page
[(regexp #rx"/$") (string-append main-page "api.php")]
[(regexp #rx"^(.*)/wiki/" (list _ domain)) (string-append domain "/w/api.php")]
[(regexp #rx"^(.*)/w/" (list _ domain)) (string-append domain "/api.php")]
[_ (error 'get-api-endpoint "unknown url format: ~a" main-page)])))
(define (get-search-page wiki)
(define main-page (third wiki))
(define override (fourth wiki))
(or override
(match main-page
[(regexp #rx"/$") (string-append main-page "Special:Search")]
[(regexp #rx"^(.*/(?:en|w[^./]*)/)" (list _ wiki-prefix)) (string-append wiki-prefix "Special:Search")]
[_ (error 'get-search-page "unknown url format: ~a" main-page)])))
(define/memoize (get-redirect-content wikiname) #:hash hash
(define wiki (hash-ref wikis-hash wikiname #f))
(cond
[wiki
(define display-name (cadr wiki))
(define endpoint (string-append (get-api-endpoint wiki) "?action=parse&page=MediaWiki:BreezeWikiRedirect&prop=text&formatversion=2&format=json"))
(define res (get endpoint))
(define html (jp "/parse/text" (response-json res)))
(define content ((query-selector (λ (t a c) (has-class? "mw-parser-output" a))
(html->xexp html))))
(define body (for/list ([p (in-producer (query-selector (λ (t a c) (eq? t 'p)) content) #f)]) p))
(define table (parse-table ((query-selector (λ (t a c) (eq? t 'table)) content))))
(define-values (links links-errors) (table->links table))
(define-values (logo logo-errors) (table->logo table))
(define construct-errors (append links-errors logo-errors))
(λ (title)
(define go
(string-append (get-search-page wiki)
"?"
(params->query `(("search" . ,title)
("go" . "Go")))))
`(aside (@ (class "niwa__notice"))
(h1 (@ (class "niwa__header")) ,display-name " has its own website separate from Fandom.")
(div (@ (class "niwa__cols"))
(div (@ (class "niwa__left"))
(a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,display-name "")
,@body
(p "This wiki's core community has wholly migrated away from Fandom. You should "
(a (@ (href ,go)) "go to " ,display-name " now!")))
,(if logo
`(div (@ (class "niwa__right"))
(img (@ (class "niwa__logo") (src ,logo))))
""))
,(if (pair? links)
`(p (@ (class "niwa__feedback"))
,@(add-between links " / "))
"")
,(if (pair? construct-errors)
`(ul
,@(for/list ([error construct-errors])
`(li ,error)))
"")))]
[#t #f]))
(module+ test
(check-not-false ((get-redirect-content "gallowmere") "MediEvil Wiki")))