Create archiver and offline code handlers

Somewhat messy. Will clean up gradually in future commits.
Cadence Ember 2023-02-05 17:56:15 +13:00
commit c7cce5479d
46 changed files with 4274 additions and 407 deletions

#lang info
(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo" "net-cookies-lib" "gui-easy-lib" "sql" "charterm" "cli"))

#lang cli
(require charterm
(help (usage "Downloads a single Fandom wiki in BreezeWiki offline format."
"Downloaded pages go into `archive/` next to the executable."
"Database goes into `archiver.db*` next to the executable."
"The database is necessary to store your download progress and resume where you left off if the process is interrupted.")
(ps ""
"Default output style is `progress` in a tty and `lines` otherwise."))
(flag (output-quiet?)
("-q" "--output-quiet" "disable progress output")
(output-quiet? #t))
(flag (output-lines?)
("-l" "--output-lines" "output the name of each file downloaded")
(output-lines? #t))
(flag (output-progress?)
("-p" "--output-progress" "progress output for terminals")
(output-progress? #t))
(constraint (one-of output-quiet? output-lines? output-progress?))
(start [wikiname "wikiname to download"])
;; set up arguments
(define width 80)
(when (not (or (output-quiet?) (output-lines?) (output-progress?)))
(cond [(terminal-port? current-input-port)
(output-progress? #t)]
(output-lines? #t)]))
(define (update-width)
(when (output-progress?)
(call-with-values (λ () (charterm-screen-size))
(λ (cols rows) (set! width cols))))))
;; check
(when (or (not wikiname) (equal? wikiname ""))
(raise-user-error "Please specify the wikiname to download on the command line."))
;; stage 1
(cond [(output-lines?) (displayln "Downloading list of pages...")]
[(output-progress?) (printf "Downloading list of pages... \r")])
(λ (a b c)
(cond [(output-progress?) (printf "Downloading list of pages... [~a/~b]\r" a b)])))
;; stage 2
(λ (a b c)
(define basename (basename->name-for-query c))
(displayln basename)]
(when (eq? (modulo a 20) 0)
(thread (λ () (update-width))))
(define prefix (format "[~a/~a] " a b))
(define rest (- width (string-length prefix)))
(define real-width (min (string-length basename) rest))
(define spare-width (- rest real-width))
(define name-display (substring basename 0 real-width))
(define whitespace (make-string spare-width #\ ))
(printf "~a~a~a\r" prefix name-display whitespace)]))))
(run start)

#lang racket/base
(require racket/list
(define-runtime-path database-file "../storage/archiver.db")
(define migrations
((query-exec slc "create table page (wikiname TEXT NOT NULL, basename TEXT NOT NULL, progress INTEGER NOT NULL, PRIMARY KEY (wikiname, basename))")
(query-exec slc "create table wiki (wikiname TEXT NOT NULL, progress INTEGER, PRIMARY KEY (wikiname))"))
((query-exec slc "create table special_page (wikiname TEXT NOT NULL, key TEXT NOT NULL, basename TEXT NOT NULL, PRIMARY KEY (wikiname, key))"))
((query-exec slc "update wiki set progress = 2 where wikiname in (select wikiname from wiki inner join page using (wikiname) group by wikiname having min(page.progress) = 1)"))
((query-exec slc "create table image (wikiname TEXT NOT NULL, hash TEXT NTO NULL, url TEXT NOT NULL, ext TEXT, source INTEGER NOT NULL, progress INTEGER NOT NULL, PRIMARY KEY (wikiname, hash))"))
((query-exec slc "alter table wiki add column sitename TEXT")
(query-exec slc "alter table wiki add column basepage TEXT")
(query-exec slc "alter table wiki add column license_text TEXT")
(query-exec slc "alter table wiki add column license_url TEXT"))))
(define slc (sqlite3-connect #:database database-file #:mode 'create))
(query-exec slc "PRAGMA journal_mode=WAL")
(define database-version
(with-handlers ([exn:fail:sql?
(λ (exn)
; need to set up the database
(query-exec slc "create table database_version (version integer, primary key (version))")
(query-exec slc "insert into database_version values (0)")
(query-value slc "select version from database_version")))
(let do-migrate-step ()
(when (database-version . < . (length migrations))
(list-ref migrations database-version))
(set! database-version (add1 database-version))
(query-exec slc "update database_version set version = $1" database-version)

#lang racket/base
(require racket/class
(define active-threads (mutable-seteq))
(define/obs @auto-retry #f)
(define/obs @wikiname "")
(define/obs @state 'waiting)
(define/obs @num-pages 1)
(define/obs @done-pages 0)
(define/obs @just-done "")
(define/obs @queue '())
(define @title
(λ (state queue num-pages done-pages)
(define suffix (if (pair? queue)
(format " +~a" (length queue))
(define progress (if (eq? num-pages 0)
" 0%"
(format " ~a%" (round (inexact->exact (* (/ done-pages num-pages) 100))))))
(case state
[(waiting stage-0) (format "Fandom Archiver~a" suffix)]
[(stage-1) (format "Fandom Archiver 0%~a" suffix)]
[(stage-2) (format "Fandom Archiver~a~a" progress suffix)]
[(err) "ERROR Fandom Archiver"]
[(done) "Fandom Archiver 100%"]))
@state @queue @num-pages (obs-throttle @done-pages #:duration 5000)))
(define-syntax-rule (t body ...)
(set-add! active-threads (thread (λ () body ...))))
(define (do-start-or-queue)
(define wikiname (obs-peek @wikiname))
(:= @wikiname "")
(when (not (equal? (string-trim wikiname) ""))
(@queue . <~ . (λ (q) (append q (list wikiname))))
(define (shift-queue-maybe)
(when (memq (obs-peek @state) '(waiting done))
(define q (obs-peek @queue))
[(pair? q)
(define wikiname (car q))
(:= @queue (cdr q))
(do-start-stage1 wikiname)]
[#t (:= @state 'done)])))
(define (do-start-stage1 wikiname)
(:= @just-done "")
(:= @done-pages 0)
(:= @num-pages 1)
(t (with-handlers ([exn:fail? (handle-graphical-exn wikiname)])
(:= @state 'stage-0)
(if-necessary-download-list-of-pages wikiname (λ (now-done num-pages just-done-name)
(:= @num-pages num-pages)
(:= @done-pages now-done)
(:= @just-done just-done-name)
(:= @state 'stage-1)))
(do-start-stage2 wikiname))))
(define (do-start-stage2 wikiname)
(:= @just-done "")
(:= @num-pages 1)
(:= @done-pages 0)
(t (with-handlers ([exn:fail? (handle-graphical-exn wikiname)])
(save-each-page wikiname (λ (now-done num-pages just-done-path)
(:= @num-pages num-pages)
(:= @done-pages now-done)
(:= @just-done just-done-path)))
(:= @state 'waiting)
(:= @state 'stage-2))
(define (exn->string e)
(λ ()
(displayln (exn-message e))
(displayln "context:")
(for ([item (continuation-mark-set->context (exn-continuation-marks e))])
(printf " ~a" (srcloc->string (cdr item)))
(when (car item)
(printf ": ~a" (car item)))
(displayln "")))))
(define ((handle-graphical-exn wikiname) e)
(displayln (exn->string e) (current-error-port))
[(obs-peek @auto-retry)
(do-retry-end wikiname)]
(:= @state 'err)
(λ ()
(define/obs @visible? #t)
(dialog #:title "Download Error"
#:style '(resize-border)
#:mixin (λ (%) (class % (super-new)
(obs-observe! @visible? (λ (visible?) (send this show visible?)))))
(vpanel #:margin '(15 15)
(text "Encountered this error while downloading:")
(input #:style '(multiple hscroll)
#:min-size '(#f 200)
(exn->string e))
(button "Retry Now" (λ () (:= @visible? #f) (do-retry-now wikiname)))
(button "Retry Round-Robin" (λ () (:= @visible? #f) (do-retry-end wikiname)))
(button "Skip Wiki" (λ () (:= @visible? #f) (do-continue)))
(button "Use Auto-Retry" (λ ()
(:= @auto-retry #t)
(:= @visible? #f)
(do-retry-end wikiname)))
(text "Be careful not to auto-retry an infinite loop!")))
; make sure the old broken threads are all gone
(for ([th active-threads]) (kill-thread th))
(set-clear! active-threads)]))
(define (do-retry-now wikiname)
(@queue . <~ . (λ (q) (append (list wikiname) q)))
(:= @state 'waiting)
(define (do-retry-end wikiname)
(@queue . <~ . (λ (q) (append q (list wikiname))))
(:= @state 'waiting)
(define (do-continue)
(:= @state 'waiting)
(define (display-basename basename)
(define limit 40)
(cond [(string? basename)
(define query (basename->name-for-query basename))
(define segments (string-split query "/"))
(when (and ((string-length query) . > . limit) ((length segments) . >= . 2))
(set! query (string-append ".../" (last segments))))
(when ((string-length query) . > . limit)
(set! query (string-append (substring query 0 (- limit 3)) "...")))
[#t "?"]))
(define main-window
(window #:title @title
#:size '(360 200)
#:mixin (λ (%) (class %
(define/augment (on-close)
(for ([th active-threads]) (kill-thread th))
(disconnect slc))))
;; input box at the top
(hpanel (text "https://")
(input @wikiname
(λ (event data) (cond
[(eq? event 'input) (:= @wikiname data)]
[(eq? event 'return) (do-start-or-queue)])))
(text ""))
(button (@queue . ~> . (λ (q) (if (null? q) "Start" "Queue"))) (λ () (do-start-or-queue)))
(text (@queue . ~> . (λ (q) (if (null? q) "" (string-join #:before-first "Queue: " q ", ")))))
;; show status based on overall application state
;; waiting for wikiname entry
((waiting) (vpanel
(text "Fill in the wikiname and click start.")))
((stage-0) (vpanel
(text "Checking data...")))
((stage-1) (vpanel
(text "Gathering list of pages...")
(text (@just-done . ~> . display-basename))
(text (@done-pages . ~> . (λ (x) (if (eq? x 0)
(format "~a/~a" x (obs-peek @num-pages))))))))
;; downloading contents
((stage-2) (vpanel
(text "Downloading page text...")
(progress @done-pages #:range @num-pages)
(text (@done-pages . ~> . (λ (x) (format "~a/~a" x (obs-peek @num-pages)))))
(text (@just-done . ~> . display-basename))))
((done) (vpanel
(text "All wikis downloaded!")))
((err) (vpanel
(text "Error. Check the popup window.")))
(else (text (@state . ~> . (λ (state) (format "invalid state: ~a" state))))))
(checkbox #:label "Auto-retry on error? (Dangerous)"
#:checked? @auto-retry
(λ:= @auto-retry)))))

#lang racket/base
(require racket/file
(define archive-slc slc)
(module+ test
(require rackunit))
(define-runtime-path archive-root "../storage/archive")
#;(define archive-root "archive")
(define sources '#hasheq((style . 1) (page . 2)))
(define (get-origin wikiname)
(format "" wikiname))
(define (insert-wiki-entry wikiname)
(define dest-url
(format ""
(params->query '(("action" . "query")
("meta" . "siteinfo")
("siprop" . "general|rightsinfo")
("format" . "json")
("formatversion" . "2")))))
(define data (response-json (get dest-url)))
(define exists? (query-maybe-value slc "select progress from wiki where wikiname = ?" wikiname))
(if exists?
(query-exec slc "update wiki set sitename = ?, basepage = ?, license_text = ?, license_url = ? where wikiname = ?"
(jp "/query/general/sitename" data)
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
(jp "/query/rightsinfo/text" data)
(jp "/query/rightsinfo/url" data)
(query-exec slc "insert into wiki (wikiname, progress, sitename, basepage, license_text, license_url) values (?, 1, ?, ?, ?, ?)"
(jp "/query/general/sitename" data)
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
(jp "/query/rightsinfo/text" data)
(jp "/query/rightsinfo/url" data))))
;; call 1 if not yet done for that wiki
(define (if-necessary-download-list-of-pages wikiname callback)
(define wiki-progress (query-maybe-value slc "select progress from wiki where wikiname = ?" wikiname))
;; done yet?
(unless (and (real? wiki-progress) (wiki-progress . >= . 1))
;; count total pages
(define dest-url
(format ""
(params->query `(("action" . "query") ("meta" . "siteinfo") ("siprop" . "statistics") ("format" . "json")))))
(define num-pages (jp "/query/statistics/articles" (response-json (get dest-url))))
(download-list-of-pages wikiname callback 0 num-pages #f)))
;; 1. Download list of wiki pages and store in database
(define (download-list-of-pages wikiname callback total-so-far grand-total path-with-namefrom)
(define url (if path-with-namefrom
(format "" wikiname path-with-namefrom)
(format "" wikiname)))
(define r (get url))
(define page (html->xexp (bytes->string/utf-8 (response-body r))))
(define link-namefrom
((query-selector (λ (t a c x) (and (eq? t 'a)
(pair? x)
(string-contains? (car x) "Next page")
(let ([href (get-attribute 'href a)] )
(and href (string-contains? href "/wiki/Local_Sitemap")))))
page #:include-text? #t)))
(define row-values
(for/list ([link (in-producer
(λ (t a c) (eq? t 'a))
((query-selector (λ (t a c) (has-class? "mw-allpages-chunk" a)) page)))
(list wikiname (local-encoded-url->basename (get-attribute 'href (bits->attributes link))) 0)))
(define query-template (string-join (make-list (length row-values) "(?, ?, ?)") ", " #:before-first "insert or ignore into page (wikiname, basename, progress) values "))
(apply query-exec slc query-template (flatten row-values))
(define new-total (+ (length row-values) total-so-far))
(callback new-total grand-total (second (last row-values)))
[link-namefrom ; repeat on the next page
(download-list-of-pages wikiname callback new-total grand-total (get-attribute 'href (bits->attributes link-namefrom)))]
[#t ; all done downloading sitemap
(insert-wiki-entry wikiname)]))
;; 2. Download each page via API and:
;; * Save API response to file
(define max-page-progress 1)
(define (save-each-page wikiname callback)
;; prepare destination folder
(define save-dir (build-path archive-root wikiname))
(make-directory* save-dir)
;; gather list of basenames to download (that aren't yet complete)
(define basenames (query-list slc "select basename from page where wikiname = ? and progress < ?"
wikiname max-page-progress))
;; counter of complete/incomplete basenames
(define already-done-count
(query-value slc "select count(*) from page where wikiname = ? and progress = ?"
wikiname max-page-progress))
(define not-done-count
(query-value slc "select count(*) from page where wikiname = ? and progress < ?"
wikiname max-page-progress))
;; set initial progress
(callback already-done-count (+ already-done-count not-done-count) "")
;; loop through basenames and download
(for ([basename basenames]
[i (in-naturals 1)])
(define name-for-query (basename->name-for-query basename))
(define dest-url
(format ""
(params->query `(("action" . "parse")
("page" . ,name-for-query)
("prop" . "text|headhtml|langlinks")
("formatversion" . "2")
("format" . "json")))))
(define r (get dest-url))
(define body (response-body r))
(define filename (string-append basename ".json"))
(define save-path
(cond [((string-length basename) . > . 240)
(define key (sha1 (string->bytes/latin-1 basename)))
(query-exec slc "insert into special_page (wikiname, key, basename) values (?, ?, ?)"
wikiname key basename)
(build-path save-dir (string-append key ".json"))]
(build-path save-dir (string-append basename ".json"))]))
(display-to-file body save-path #:exists 'replace)
(query-exec slc "update page set progress = 1 where wikiname = ? and basename = ?"
wikiname basename)
(callback (+ already-done-count i) (+ already-done-count not-done-count) basename))
;; saved all pages, register that fact in the database
(query-exec slc "update wiki set progress = 2 where wikiname = ?" wikiname))
;; 3. Download CSS and:
;; * Save CSS to file
;; * Record style images to database
(define (check-style-for-images wikiname path)
(define content (file->string path))
(define urls (regexp-match* #rx"url\\(\"?'?([^)]*)'?\"?\\)" content #:match-select cadr))
(for/list ([url urls]
#:when (not (or (equal? url "")
(equal? url "'")
(string-contains? url "/resources-ucp/")
(string-contains? url "/fonts/")
(string-contains? url "/drm_fonts/")
(string-contains? url "//")
(string-contains? url "//")
(string-contains? url "dropbox")
(string-contains? url "only=styles")
(string-contains? url "https://https://")
(regexp-match? #rx"^%20" url)
(regexp-match? #rx"^data:" url))))
[(string-prefix? url "https://") url]
[(string-prefix? url "http://") (regexp-replace #rx"http:" url "https:")]
[(string-prefix? url "//") (string-append "https:" url)]
[(string-prefix? url "/") (format "" wikiname url)]
[else (raise-user-error "While calling check-style-for-images, this URL had an unknown format and couldn't be saved:" url path)])))
(define (download-styles-for-wiki wikiname)
(define save-dir (build-path archive-root wikiname "styles"))
(make-directory* save-dir)
(define theme (λ (theme-name)
(cons (format "" wikiname theme-name)
(build-path save-dir (format "themeVariables-~a.css" theme-name)))))
;; (Listof (Pair url save-path))
(define styles
(theme "default")
(theme "light")
(theme "dark")
(cons (format "" wikiname)
(build-path save-dir "site.css"))))
(for ([style styles])
(define r (get (car style)))
(define body (response-body r))
(display-to-file body (cdr style) #:exists 'replace)
;; XXX: how the HELL do I deal with @import?? would need some kind of recursion here. how will the page server know where to look up the style file to be able to serve them again? do I add another link-stylesheet tag to the main page? what about the remaining stuck @import url?
(define (do-step-3 wikiname)
(define wiki-progress (query-maybe-value slc "select progress from wiki where wikiname = ?" wikiname))
(unless (and (number? wiki-progress) (wiki-progress . >= . 3))
(define styles (download-styles-for-wiki wikiname))
(define unique-image-urls
(map image-url->values
(for/list ([style styles])
(check-style-for-images wikiname (cdr style)))))
#:key cdr))
(println unique-image-urls)
(for ([pair unique-image-urls])
(query-exec slc "insert or ignore into image (wikiname, url, hash, ext, source, progress) values (?, ?, ?, NULL, 1, 0)" wikiname (car pair) (cdr pair)))
(query-exec slc "update wiki set progress = 3 where wikiname = ?" wikiname)))
;; 4: From downloaded pages, record URLs of image sources and inline style images to database
(define (hash->save-dir wikiname hash)
(build-path archive-root wikiname "images" (substring hash 0 1) (substring hash 0 2)))
(define (image-url->values i)
;; TODO: handle case where there is multiple cb parameter on minecraft wiki
;; TODO: ensure it still "works" with broken &amp; on minecraft wiki
(define no-cb (regexp-replace #rx"\\cb=[0-9]+&?" i "")) ; remove cb url parameter which does nothing
(define key (regexp-replace #rx"[&?]$" no-cb "")) ; remove extra separator if necessary
(define hash (sha1 (string->bytes/utf-8 key)))
(cons key hash))
(define (check-json-for-images wikiname path)
(define data (with-input-from-file path (λ () (read-json))))
(define page (html->xexp (preprocess-html-wiki (jp "/parse/text" data))))
(define tree (update-tree-wiki page wikiname))
(for/list ([element (in-producer
(λ (t a c)
(and (eq? t 'img)
(get-attribute 'src a)))
(image-url->values (get-attribute 'src (bits->attributes element))))))
;; 5. Download image sources and style images according to database
(define (save-each-image wikiname source callback)
;; gather list of basenames to download (that aren't yet complete)
(define rows (query-rows slc "select url, hash from image where wikiname = ? and source <= ? and progress < 1"
wikiname source))
;; counter of complete/incomplete basenames
(define already-done-count
(query-value slc "select count(*) from image where wikiname = ? and source <= ? and progress = 1"
wikiname source))
(define not-done-count
(query-value slc "select count(*) from image where wikiname = ? and source <= ? and progress < 1"
wikiname source))
;; set initial progress
(callback already-done-count (+ already-done-count not-done-count) "")
;; loop through urls and download
(for ([row rows]
[i (in-naturals 1)])
;; row fragments
(define url (vector-ref row 0))
(define hash (vector-ref row 1))
;; check
(printf "~a -> ~a~n" url hash)
(define r (get url))
(define declared-type (response-headers-ref r 'content-type))
(define final-type (if (equal? declared-type #"application/octet-stream")
(let ([sniff-entity (message-entity (mime-analyze (response-body r)))])
(string->bytes/latin-1 (format "~a/~a" (entity-type sniff-entity) (entity-subtype sniff-entity))))
(define ext (bytes->string/latin-1 (mime-type->ext final-type)))
;; save
(define save-dir (hash->save-dir wikiname hash))
(make-directory* save-dir)
(define save-path (build-path save-dir (string-append hash "." ext)))
(define body (response-body r))
(display-to-file body save-path #:exists 'replace)
(query-exec slc "update image set progress = 1, ext = ? where wikiname = ? and hash = ?"
ext wikiname hash)
(callback (+ already-done-count i) (+ already-done-count not-done-count) (string-append hash "." ext)))
;; TODO: saved all images, register that fact in the database
(module+ test
(check-equal? (html->xexp "<img src=\";width=150\">")
'(*TOP* (img (@ (src "")))))
#;(download-list-of-pages "minecraft" values)
#;(save-each-page "minecraft" values)
#;(check-json-for-images "chiki" (build-path archive-root "chiki" "Fiona.json"))
#;(do-step-3 "gallowmere")
#;(save-each-image "gallowmere" (hash-ref sources 'style) (λ (a b c) (printf "~a/~a ~a~n" a b c)))
#;(for ([wikiname (query-list slc "select wikiname from wiki")])
(println wikiname)
(insert-wiki-entry wikiname))
#;(for ([wikiname (query-list slc "select wikiname from wiki")])
(println wikiname)
(do-step-3 wikiname)
(save-each-image wikiname (hash-ref sources 'style) (λ (a b c) (printf "~a/~a ~a~n" a b c)))))

(require-reloadable "src/page-search.rkt" page-search) (require-reloadable "src/page-search.rkt" page-search)
(require-reloadable "src/page-set-user-settings.rkt" page-set-user-settings) (require-reloadable "src/page-set-user-settings.rkt" page-set-user-settings)
(require-reloadable "src/page-static.rkt" static-dispatcher) (require-reloadable "src/page-static.rkt" static-dispatcher)
(require-reloadable "src/page-static-archive.rkt" page-static-archive)
(require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher) (require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher)
(require-reloadable "src/page-wiki.rkt" page-wiki) (require-reloadable "src/page-wiki.rkt" page-wiki)
(require-reloadable "src/page-wiki-offline.rkt" page-wiki-offline)
(require-reloadable "src/page-file.rkt" page-file) (require-reloadable "src/page-file.rkt" page-file)
(reload!) (reload!)
@ -42,7 +44,9 @@
page-proxy page-proxy
page-search page-search
page-set-user-settings page-set-user-settings
page-wiki page-wiki
page-file page-file
redirect-wiki-home redirect-wiki-home
(require (only-in "src/page-search.rkt" page-search)) (require (only-in "src/page-search.rkt" page-search))
(require (only-in "src/page-set-user-settings.rkt" page-set-user-settings)) (require (only-in "src/page-set-user-settings.rkt" page-set-user-settings))
(require (only-in "src/page-static.rkt" static-dispatcher)) (require (only-in "src/page-static.rkt" static-dispatcher))
(require (only-in "src/page-static-archive.rkt" page-static-archive))
(require (only-in "src/page-subdomain.rkt" subdomain-dispatcher)) (require (only-in "src/page-subdomain.rkt" subdomain-dispatcher))
(require (only-in "src/page-wiki.rkt" page-wiki)) (require (only-in "src/page-wiki.rkt" page-wiki))
(require (only-in "src/page-wiki-offline.rkt" page-wiki-offline))
(require (only-in "src/page-file.rkt" page-file)) (require (only-in "src/page-file.rkt" page-file))
(serve/launch/wait (serve/launch/wait
@ -31,7 +33,9 @@
page-proxy page-proxy
page-search page-search
page-set-user-settings page-set-user-settings
page-wiki page-wiki
page-file page-file
redirect-wiki-home redirect-wiki-home
#lang racket/base
(require racket/string
(only-in net/uri-codec uri-decode)
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
(map path/param-path (url-path (string->url str))))
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
(define basic-filename (string-join extra-encoded "#"))
(define (local-encoded-url->basename str) ; '("wiki" "Page_title"), no extension or dir prefix
(url-segments->basename (local-encoded-url->segments str)))
(define (basename->name-for-query str)
(uri-decode (regexp-replace* #rx"#" str "/")))
(define (url-segments->guess-title segments)
(regexp-replace* #rx"_" (cadr segments) " "))

File diff suppressed because it is too large Load diff

#lang racket/base
(require racket/contract
[ext->mime-type (-> bytes? bytes?)]
[mime-type->ext (-> bytes? bytes?)]))
(define-runtime-path mime.types-path "mime.types")
(define ls
(call-with-input-file mime.types-path
(λ (in) (for/list ([line (in-lines in)]
#:when (not (regexp-match? #rx"^ *($|#)" line)))
(match line
[(regexp #rx"^([^ ]+) +(.+)$" (list _ mime ext))
(cons (string->bytes/utf-8 ext) (string->bytes/utf-8 mime))]
[(regexp #rx"^ *#") (void)]
[_ (log-warning "mime-types: failed to parse line ~s" line)])))))
(define forward-hash (make-immutable-hash ls))
(define reverse-hash (make-immutable-hash (map (λ (x) (cons (cdr x) (car x))) ls)))
(define (ext->mime-type ext-in)
(define ext (regexp-replace #rx"^\\." ext-in #""))
(hash-ref forward-hash ext))
(define (mime-type->ext m-in)
(define m (regexp-replace #rx";.*" m-in #""))
(hash-ref reverse-hash m))

text/html html
text/css css
text/xml xml
image/gif gif
image/jpeg jpeg
application/javascript js
text/javascript js
application/atom+xml atom
application/rss+xml rss
text/mathml mml
text/plain txt
text/x-component htc
image/png png
image/tiff tiff
image/vnd.wap.wbmp wbmp
image/x-icon ico
image/x-jng jng
image/x-ms-bmp bmp
image/svg+xml svg
image/webp webp
application/font-woff2 woff2
application/acad woff2
font/woff2 woff2
application/font-woff woff
application/x-font-ttf ttf
application/x-font-truetype ttf
application/x-truetype-font ttf
application/font-sfnt ttf
font/sfnt ttf
application/vnd.oasis.opendocument.formula-template otf
application/x-font-opentype otf
application/ otf
font/otf otf
application/java-archive jar
application/json json
application/mac-binhex40 hqx
application/msword doc
application/pdf pdf
application/postscript ps
application/rtf rtf
application/ m3u8
application/ xls
application/ eot
application/ ppt
application/vnd.wap.wmlc wmlc
application/ kml
application/ kmz
application/x-7z-compressed 7z
application/x-cocoa cco
application/x-java-archive-diff jardiff
application/x-java-jnlp-file jnlp
application/x-makeself run
application/x-perl pl
application/x-rar-compressed rar
application/x-redhat-package-manager rpm
application/x-sea sea
application/x-shockwave-flash swf
application/x-stuffit sit
application/x-tcl tcl
application/x-x509-ca-cert pem
application/x-xpinstall xpi
application/xhtml+xml xhtml
application/xspf+xml xspf
application/zip zip
application/gzip gz
audio/midi mid midi kar
audio/mpeg mp3
audio/ogg ogg
audio/x-m4a m4a
audio/x-realaudio ra
video/mp2t ts
video/mp4 mp4
video/mpeg mpeg
video/quicktime mov
video/webm webm
video/x-flv flv
video/x-m4v m4v
video/x-mng mng
video/x-ms-wmv wmv
video/x-msvideo avi

; call the updater on the dictionary key only if it has that key ; call the updater on the dictionary key only if it has that key
alist-maybe-update alist-maybe-update
; update a value only if a condition succeeds on it ; update a value only if a condition succeeds on it
u) u
; like string-join, but for lists
(module+ test (module+ test
(require "typed-rackunit.rkt")) (require "typed-rackunit.rkt"))
(define u-counter (box 0))
(: alist-maybe-update ( (A B) ((Listof (Pairof A B)) A (B -> B) -> (Listof (Pairof A B))))) (: alist-maybe-update ( (A B) ((Listof (Pairof A B)) A (B -> B) -> (Listof (Pairof A B)))))
(define (alist-maybe-update alist key updater) (define (alist-maybe-update alist key updater)
(set-box! u-counter (add1 (unbox u-counter)))
(map (λ ([p : (Pairof A B)]) (map (λ ([p : (Pairof A B)])
(if (eq? (car p) key) (if (eq? (car p) key)
(cons (car p) (updater (cdr p))) (cons (car p) (updater (cdr p)))
@ -24,7 +30,16 @@
(: u ( (A) ((A -> Any) (A -> A) A -> A))) (: u ( (A) ((A -> Any) (A -> A) A -> A)))
(define (u condition updater value) (define (u condition updater value)
(set-box! u-counter (add1 (unbox u-counter)))
(if (condition value) (updater value) value)) (if (condition value) (updater value) value))
(module+ test (module+ test
(check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 4) -4) (check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 4) -4)
(check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 8) 8)) (check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 8) 8))
(: list-join ( (A B) (A (Listof B) -> (Listof (U A B)))))
(define (list-join element ls)
(if (pair? (cdr ls))
(list* (car ls) element (list-join element (cdr ls)))
(list (car ls))))
(module+ test
(check-equal? (list-join "h" '(2 3 4 5)) '(2 "h" 3 "h" 4 "h" 5)))

; help make a nested if. if/in will gain the same false form of its containing if/out. ; help make a nested if. if/in will gain the same false form of its containing if/out.
if/out if/out
; let, but the value for each variable is evaluated within a thread ; let, but the value for each variable is evaluated within a thread
thread-let) thread-let
; cond, but values can be defined between conditions
; wrap sql statements into lambdas so they can be executed during migration
(module+ test (module+ test
(require rackunit) (require rackunit)
@ -17,9 +21,12 @@
;; it's in a submodule so that it can be required in both levels, for testing ;; it's in a submodule so that it can be required in both levels, for testing
(module transform racket/base (module transform racket/base
(require racket/list)
(provide (provide
transform-if/out transform-if/out
transform-thread-let) transform-thread-let
(define (transform-if/out stx) (define (transform-if/out stx)
(define tree (cdr (syntax->datum stx))) ; condition true false (define tree (cdr (syntax->datum stx))) ; condition true false
@ -62,12 +69,46 @@
(define def (list-ref defs n)) (define def (list-ref defs n))
`(,(car def) (channel-get (vector-ref chv ,n)))) `(,(car def) (channel-get (vector-ref chv ,n))))
counter) counter)
,@forms))))) ,@forms))))
(define (transform/out-cond/var stx)
(define tree (transform-cond/var (cdr (syntax->datum stx))))
(define (transform-cond/var tree)
(define-values (els temp) (splitf-at tree (λ (el) (and (pair? el) (not (eq? (car el) 'var))))))
(define-values (vars rest) (splitf-at temp (λ (el) (and (pair? el) (eq? (car el) 'var)))))
(if (null? rest)
`(cond ,@els)
(let ,(for/list ([var vars])
(cdr var))
,(transform-cond/var rest))]))))
;; the syntax definitions and their tests go below here ;; the syntax definitions and their tests go below here
(require 'transform (for-syntax 'transform)) (require 'transform (for-syntax 'transform))
(define-syntax (wrap-sql stx)
; the arguments
(define xs (cdr (syntax->list stx)))
; wrap each argument
(define wrapped (map (λ (xe) ; xe is the syntax of an argument
(if (list? (car (syntax->datum xe)))
; it's a list of lists (a list of sql migration steps)
; return instead syntax of a lambda that will call everything in xe
(datum->syntax stx `(λ () ,@xe))
; it's just a single sql migration step
; return instead syntax of a lambda that will call xe
(datum->syntax stx `(λ () ,xe))))
; since I'm returning *code*, I need to return the form (list ...) so that runtime makes a list
(datum->syntax stx `(list ,@wrapped)))
(define-syntax (if/out stx) (define-syntax (if/out stx)
(transform-if/out stx)) (transform-if/out stx))
(module+ test (module+ test
@ -106,3 +147,15 @@
; check that it assigns the correct value to the correct variable ; check that it assigns the correct value to the correct variable
(check-equal? (thread-let ([a (sleep 0) 'a] [b 'b]) (list a b)) (check-equal? (thread-let ([a (sleep 0) 'a] [b 'b]) (list a b))
'(a b))) '(a b)))
(define-syntax (cond/var stx)
(transform/out-cond/var stx))
(module+ test
(check-syntax-equal? (transform/out-cond/var #'(cond/def [#f 0] (var d (* a 2)) [(eq? d 8) d] [#t "not 4"]))
[#f 0]
(let ([d (* a 2)])
[(eq? d 8) d]
[#t "not 4"]))])))

racket/function racket/function
racket/match racket/match
racket/string racket/string
"pure-utils.rkt" "pure-utils.rkt"
"url-utils.rkt" "url-utils.rkt"
"xexpr-utils.rkt") "xexpr-utils.rkt")
(provide (provide
update-tree-wiki) update-tree-wiki)
(define (preprocess-html-wiki html)
(define ((rr* find replace) contents)
(regexp-replace* find contents replace))
; fix navbox list nesting
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
; add a <ul> to make the parser happy
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
(rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
; change <figcaption><p> to <figcaption><span> to make the parser happy
(rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
(module+ test
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
(check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> <p class=\"caption\">Caption text.</p></figcaption></figure>")
"<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
(module+ test (module+ test
(require rackunit (require rackunit
html-parsing) "html-parsing/main.rkt")
(define wiki-document (define wiki-document
'(*TOP* '(*TOP*
(div (@ (class "mw-parser-output")) (div (@ (class "mw-parser-output"))
@ -47,7 +65,7 @@
(figcaption "Test figure!")) (figcaption "Test figure!"))
(iframe (@ (src ""))))))) (iframe (@ (src "")))))))
(define (updater wikiname) (define (updater wikiname #:strict-proxy? [strict-proxy? #f])
(define classlist-updater (define classlist-updater
(compose1 (compose1
; uncollapse all navbox items (bottom of page mass navigation) ; uncollapse all navbox items (bottom of page mass navigation)
@ -101,7 +119,7 @@
'("")))) '(""))))
; proxy images from inline styles, if strict_proxy is set ; proxy images from inline styles, if strict_proxy is set
(curry u (curry u
(λ (v) (config-true? 'strict_proxy)) (λ (v) strict-proxy?)
(λ (v) (attribute-maybe-update (λ (v) (attribute-maybe-update
'style 'style
(λ (style) (λ (style)
@ -114,14 +132,14 @@
; and also their links, if strict_proxy is set ; and also their links, if strict_proxy is set
(curry u (curry u
(λ (v) (λ (v)
(and (config-true? 'strict_proxy) (and strict-proxy?
#;(eq? element-type 'a) #;(eq? element-type 'a)
(or (has-class? "image-thumbnail" v) (or (has-class? "image-thumbnail" v)
(has-class? "image" v)))) (has-class? "image" v))))
(λ (v) (attribute-maybe-update 'href u-proxy-url v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v)))
; proxy images from src attributes, if strict_proxy is set ; proxy images from src attributes, if strict_proxy is set
(curry u (curry u
(λ (v) (config-true? 'strict_proxy)) (λ (v) strict-proxy?)
(λ (v) (attribute-maybe-update 'src u-proxy-url v))) (λ (v) (attribute-maybe-update 'src u-proxy-url v)))
; don't lazyload images ; don't lazyload images
(curry u (curry u
@ -208,13 +226,12 @@
updater) updater)
(define (update-tree-wiki tree wikiname) (define (update-tree-wiki tree wikiname #:strict-proxy? [strict-proxy? #f])
(update-tree (updater wikiname) tree)) (update-tree (updater wikiname #:strict-proxy? strict-proxy?) tree))
(module+ test (module+ test
(define transformed (define transformed
(parameterize ([(config-parameter 'strict_proxy) "true"]) (update-tree-wiki wiki-document "test" #:strict-proxy? #t))
(update-tree-wiki wiki-document "test")))
; check that wikilinks are changed to be local ; check that wikilinks are changed to be local
(check-equal? (get-attribute 'href (bits->attributes (check-equal? (get-attribute 'href (bits->attributes
((query-selector ((query-selector
@ -260,8 +277,8 @@
; check that noscript images are removed ; check that noscript images are removed
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
; benchmark ; benchmark
(when (file-exists? "Frog.html2") (when (file-exists? "../misc/Frog.html")
(with-input-from-file "Frog.html2" (with-input-from-file "../misc/Frog.html"
(λ () (λ ()
(define tree (html->xexp (current-input-port))) (define tree (html->xexp (current-input-port)))
(time (length (update-tree-wiki tree "minecraft"))))))) (time (length (update-tree-wiki tree "minecraft")))))))

#lang typed/racket/base #lang typed/racket/base
(require racket/string (require racket/string
"pure-utils.rkt") "pure-utils.rkt")
(require/typed web-server/http/request-structs (require/typed web-server/http/request-structs
[#:opaque Header header?]) [#:opaque Header header?])
@ -10,12 +9,14 @@
px-wikiname px-wikiname
; make a query string from an association list of strings ; make a query string from an association list of strings
params->query params->query
; custom percent encoding (you probably want params->query instead)
; sets for custom percent encoding
path-set urlencoded-set filename-set
; make a proxied version of a fandom url ; make a proxied version of a fandom url
u-proxy-url u-proxy-url
; check whether a url is on a domain controlled by fandom ; check whether a url is on a domain controlled by fandom
is-fandom-url? is-fandom-url?
; prints "out: <url>"
; pass in a header, headers, or something useless. they'll all combine into a list ; pass in a header, headers, or something useless. they'll all combine into a list
build-headers build-headers
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url ; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
@ -41,6 +42,8 @@
) )
path-set)) path-set))
(define filename-set '(#\< #\> #\: #\" #\/ #\\ #\| #\? #\* #\# #\~ #\&))
(: percent-encode (String (Listof Char) Boolean -> Bytes)) (: percent-encode (String (Listof Char) Boolean -> Bytes))
(define (percent-encode value set space-as-plus) (define (percent-encode value set space-as-plus)
(define b (string->bytes/utf-8 value)) (define b (string->bytes/utf-8 value))
@ -87,11 +90,6 @@
(λ ([v : String]) (string-append "/proxy?" (params->query `(("dest" . ,url))))) (λ ([v : String]) (string-append "/proxy?" (params->query `(("dest" . ,url)))))
url)) url))
(: log-outgoing (String -> Void))
(define (log-outgoing url-string)
(when (config-true? 'log_outgoing)
(printf "out: ~a~n" url-string)))
(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header))) (: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header)))
(define (build-headers . fs) (define (build-headers . fs)
(apply (apply

View file

@ -129,7 +129,7 @@
(λ (element-type attributes children) (λ (element-type attributes children)
(equal? (get-attribute name attributes) value))) (equal? (get-attribute name attributes) value)))
(define (query-selector selector element) (define (query-selector selector element #:include-text? [include-text? #f])
(generator (generator
() ()
(let loop ([element element]) (let loop ([element element])
@ -140,7 +140,9 @@
[(equal? element-type '*DECL*) #f] [(equal? element-type '*DECL*) #f]
[(equal? element-type '@) #f] [(equal? element-type '@) #f]
[#t [#t
(when (selector element-type attributes children) (when (if include-text?
(selector element-type attributes children (filter string? (cdr element)))
(selector element-type attributes children))
(yield element)) (yield element))
(for ([child children]) (loop child))])) (for ([child children]) (loop child))]))
#f)) #f))

racket/string racket/string
json json
net/http-easy net/http-easy
html-parsing "../lib/html-parsing/main.rkt"
"../src/xexpr-utils.rkt" "../src/xexpr-utils.rkt"
"../src/url-utils.rkt") "../src/url-utils.rkt")

html-parsing html-parsing
html-writing html-writing
web-server/http web-server/http
"config.rkt" "config.rkt"
"data.rkt" "data.rkt"
"niwa-data.rkt" "extwiki-data.rkt"
"static-data.rkt" "static-data.rkt"
"pure-utils.rkt" "../lib/syntax.rkt"
"xexpr-utils.rkt" "../lib/pure-utils.rkt"
"url-utils.rkt") "../lib/xexpr-utils.rkt"
(provide (provide
; headers to always send on all http responses ; headers to always send on all http responses
@ -79,32 +82,69 @@
;; generate a notice with a link if a fandom wiki has a replacement as part of NIWA or similar ;; generate a notice with a link if a fandom wiki has a replacement as part of NIWA or similar
;; if the wiki has no replacement, display nothing ;; if the wiki has no replacement, display nothing
(define (niwa-notice wikiname title) (define (extwiki-notice wikiname title)
(define ind (findf (λ (item) (member wikiname (first item))) niwa-data)) (define xt (findf (λ (item) (member wikiname (extwiki^-wikinames item))) extwikis))
(if ind (cond/var
(let* ([search-page (format "/Special:Search?~a" [xt
(params->query `(("search" . ,title) (let* ([group (hash-ref extwiki-groups (extwiki^-group xt))]
("go" . "Go"))))] [search-page (format "/Special:Search?~a"
[go (if (string-suffix? (third ind) "/") (params->query `(("search" . ,title)
(regexp-replace #rx"/$" (third ind) (λ (_) search-page)) ("go" . "Go"))))]
(let* ([joiner (second (regexp-match #rx"/(w[^./]*)/" (third ind)))]) [go (if (string-suffix? (extwiki^-home xt) "/")
(regexp-replace #rx"/w[^./]*/.*$" (third ind) (λ (_) (format "/~a~a" joiner search-page)))))]) (regexp-replace #rx"/$" (extwiki^-home xt) (λ (_) search-page))
`(aside (@ (class "niwa__notice")) (let* ([joiner (second (regexp-match #rx"/(w[^./]*)/" (extwiki^-home xt)))])
(h1 (@ (class "niwa__header")) ,(second ind) " has its own website separate from Fandom.") (regexp-replace #rx"/w[^./]*/.*$" (extwiki^-home xt) (λ (_) (format "/~a~a" joiner search-page)))))]
(a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,(second ind) "") [props (extwiki-props^ go)])
(div (@ (class "niwa__cols")) (cond
(div (@ (class "niwa__left")) [(eq? (extwiki^-banner xt) 'default)
(p "Most major Nintendo wikis are part of the " `(aside (@ (class "niwa__notice"))
(a (@ (href "")) "Nintendo Independent Wiki Alliance") (h1 (@ (class "niwa__header")) ,(extwiki^-name xt) " has its own website separate from Fandom.")
" and have their own wikis off Fandom. You can help this wiki by " (a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,(extwiki^-name xt) "")
(a (@ (href ,go)) "visiting it directly.")) (div (@ (class "niwa__cols"))
(p ,(fifth ind)) (div (@ (class "niwa__left"))
(div (@ (class "niwa__divider"))) (p ,((extwiki-group^-description group) props))
(p "Why are you seeing this message? Fandom refuses to delete or archive their copy of this wiki, so that means their pages will appear high up in search results. Fandom hopes to get clicks from readers who don't know any better.") (p ,((extwiki^-description xt) props))
(p (@ (class "niwa__feedback")) "This notice brought to you by BreezeWiki / " (a (@ (href "")) "Info & Context") " / " (a (@ (href "")) "Feedback?"))) (p "This wiki's core community has wholly migrated away from Fandom. You should "
(div (@ (class "niwa__right")) (a (@ (href ,go)) "go to " ,(extwiki^-name xt) " now!"))
(img (@ (class "niwa__logo") (src ,(format "" (fourth ind))))))))) (p (@ (class "niwa__feedback"))
"")) ,@(add-between
`(,@(for/list ([link (extwiki-group^-links group)])
`(a (@ (href ,(cdr link))) ,(car link)))
"This notice is from BreezeWiki"
(a (@ (href "")) "Feedback?"))
" / ")))
(div (@ (class "niwa__right"))
(img (@ (class "niwa__logo") (src ,(extwiki^-logo xt)))))))]
[(eq? (extwiki^-banner xt) 'parallel)
`(aside (@ (class "niwa__parallel"))
(h1 (@ (class "niwa__header-mini"))
"See also "
(a (@ (href ,go)) ,(extwiki^-name xt)))
(p "This topic has multiple communities of editors, some active on the Fandom wiki, others active on " ,(extwiki^-name xt) ".")
(p "For thorough research, be sure to check both communities since they may have different information!")
(p (@ (class "niwa__feedback"))
`(,@(for/list ([link (extwiki-group^-links group)])
`(a (@ (href ,(cdr link))) ,(car link)))
"This notice is from BreezeWiki"
(a (@ (href "")) "Feedback?"))
" / ")))]
[(eq? (extwiki^-banner xt) 'empty)
`(aside (@ (class "niwa__notice niwa__notice--alt"))
(h1 (@ (class "niwa__header")) "You will be redirected to " ,(extwiki^-name xt) ".")
(p (@ (style "position: relative; top: -12px;")) "This independent wiki community has its own site separate from Fandom.")
(a (@ (class "niwa__go") (href ,go)) "Take me there! →")
(p (@ (class "niwa__feedback") (style "text-align: left"))
`(,@(for/list ([link (extwiki-group^-links group)])
`(a (@ (href ,(cdr link))) ,(car link)))
"This notice is from BreezeWiki")
" / ")))]))]
(var fetched-callback (get-redirect-content wikiname))
(fetched-callback title)]
[#t ""]))
(define (generate-wiki-page (define (generate-wiki-page
content content
@ -114,22 +154,26 @@
#:title title #:title title
#:head-data [head-data-in #f] #:head-data [head-data-in #f]
#:siteinfo [siteinfo-in #f] #:siteinfo [siteinfo-in #f]
#:user-cookies [user-cookies-in #f]) #:user-cookies [user-cookies-in #f]
#:online-styles [online-styles #t])
(define siteinfo (or siteinfo-in siteinfo-default)) (define siteinfo (or siteinfo-in siteinfo-default))
(define head-data (or head-data-in ((head-data-getter wikiname)))) (define head-data (or head-data-in ((head-data-getter wikiname))))
(define user-cookies (or user-cookies-in (user-cookies-getter req))) (define user-cookies (or user-cookies-in (user-cookies-getter req)))
(define (required-styles origin) (define origin (format "" wikiname))
(map (λ (dest-path) (define required-styles
(define url (format dest-path origin)) (cond
(if (config-true? 'strict_proxy) [online-styles
(u-proxy-url url) (define styles
url)) (list
`(#;"~a/load.php?lang=en&modules=skin.fandomdesktop.styles&only=styles&skin=fandomdesktop" (format "~a/wikia.php?controller=ThemeApi&method=themeVariables&variant=~a" origin (user-cookies^-theme user-cookies))
#;"~a/load.php?lang=en&modules=ext.gadget.dungeonsWiki%2CearthWiki%2Csite-styles%2Csound-styles&only=styles&skin=fandomdesktop" (format "~a/load.php?lang=en&" origin)))
#;"~a/load.php?lang=en&modules=site.styles&only=styles&skin=fandomdesktop" (if (config-true? 'strict_proxy)
; combine the above entries into a single request for potentially extra speed - doesn't even do this! (map u-proxy-url styles)
,(format "~~a/wikia.php?controller=ThemeApi&method=themeVariables&variant=~a" (user-cookies^-theme user-cookies)) styles)]
"~a/load.php?lang=en&"))) [#t
(format "/archive/~a/styles/themeVariables-~a.css" wikiname (user-cookies^-theme user-cookies))
(format "/archive/~a/styles/site.css" wikiname))]))
`(*TOP* `(*TOP*
(html (html
@ -141,7 +185,7 @@
(config-get 'application_name))) (config-get 'application_name)))
,@(map (λ (url) ,@(map (λ (url)
`(link (@ (rel "stylesheet") (type "text/css") (href ,url)))) `(link (@ (rel "stylesheet") (type "text/css") (href ,url))))
(required-styles (format "" wikiname))) required-styles)
(link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "main.css")))) (link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "main.css"))))
(script "const BWData = " (script "const BWData = "
,(jsexpr->string (hasheq 'wikiname wikiname ,(jsexpr->string (hasheq 'wikiname wikiname
@ -154,11 +198,25 @@
(λ (v) (u-proxy-url v)) (λ (v) (u-proxy-url v))
(head-data^-icon-url head-data)))))) (head-data^-icon-url head-data))))))
(body (@ (class ,(head-data^-body-class head-data))) (body (@ (class ,(head-data^-body-class head-data)))
,(if (config-true? 'instance_is_official)
(let ([balloon '(img (@ (src "/static/three-balloons.png") (class "bw-balloon") (title "Image Source: | License: CC BY-NC 4.0 | Modifications: Resized") (width "52") (height "56")))]
[extension-eligible? (and req (assq 'user-agent (request-headers req)) (string-contains? (string-downcase (cdr (assq 'user-agent (request-headers req)))) "firefox/"))])
`(div (@ (class "bw-top-banner"))
"BreezeWiki is back! Most major wikis are available.\n"
,(if extension-eligible?
'(div (@ (class "bw-top-banner-rainbow"))
"Try " (a (@ (href "")) "our affiliated browser extension") " - redirect to BreezeWiki automatically!\n")
"As always, " (a (@ (href "")) "please go here") " to report problems, suggest features, or talk about the project.")
(div (@ (class "main-container")) (div (@ (class "main-container"))
(div (@ (class "fandom-community-header__background tileHorizontally header"))) (div (@ (class "fandom-community-header__background tileHorizontally header")))
(div (@ (class "page")) (div (@ (class "page"))
(main (@ (class "page__main")) (main (@ (class "page__main"))
,(niwa-notice wikiname title) ,(extwiki-notice wikiname title)
(div (@ (class "custom-top")) (div (@ (class "custom-top"))
(h1 (@ (class "page-title")) ,title) (h1 (@ (class "page-title")) ,title)
(nav (@ (class "sitesearch")) (nav (@ (class "sitesearch"))
@ -172,18 +230,18 @@
(div (@ (class "bw-theme__select")) (div (@ (class "bw-theme__select"))
(span (@ (class "bw-theme__main-label")) "Page theme") (span (@ (class "bw-theme__main-label")) "Page theme")
(div (@ (class "bw-theme__items")) (div (@ (class "bw-theme__items"))
,@(for/list ([theme '(default light dark)]) ,@(for/list ([theme '(default light dark)])
(define class (define class
(if (equal? theme (user-cookies^-theme user-cookies)) (if (equal? theme (user-cookies^-theme user-cookies))
"bw-theme__item bw-theme__item--selected" "bw-theme__item bw-theme__item--selected"
"bw-theme__item")) "bw-theme__item"))
`(a (@ (href ,(user-cookies-setter-url `(a (@ (href ,(user-cookies-setter-url
req req
(struct-copy user-cookies^ user-cookies (struct-copy user-cookies^ user-cookies
[theme theme]))) (class ,class)) [theme theme]))) (class ,class))
(div (@ (class "bw-theme__icon-container")) (div (@ (class "bw-theme__icon-container"))
,(hash-ref theme-icons theme)) ,(hash-ref theme-icons theme))
,(format "~a" theme))))))) ,(format "~a" theme)))))))
(div (@ (id "content") #;(class "page-content")) (div (@ (id "content") #;(class "page-content"))
(div (@ (id "mw-content-text")) (div (@ (id "mw-content-text"))
,content)) ,content))

(require racket/function (require racket/function
racket/pretty racket/pretty
racket/runtime-path racket/runtime-path
racket/string) racket/string
(require/typed ini typed/ini)
[#:opaque Ini ini?]
[read-ini (Input-Port -> Ini)]
[ini->hash (Ini -> (Immutable-HashTable Symbol (Immutable-HashTable Symbol String)))])
(provide (provide
config-parameter config-parameter
@ -14,7 +11,7 @@
config-get) config-get)
(module+ test (module+ test
(require "typed-rackunit.rkt")) (require "../lib/typed-rackunit.rkt"))
(define-runtime-path path-config "../config.ini") (define-runtime-path path-config "../config.ini")
@ -38,54 +35,58 @@
(instance_is_official . "false") ; please don't turn this on, or you will make me very upset (instance_is_official . "false") ; please don't turn this on, or you will make me very upset
(log_outgoing . "true") (log_outgoing . "true")
(port . "10416") (port . "10416")
(strict_proxy . "true"))) (strict_proxy . "false")
(feature_offline::enabled . "false")
(feature_offline::format . "json.gz")
(feature_offline::only . "false")))
(define loaded-alist (define loaded-alist
(with-handlers (with-handlers
([exn:fail:filesystem:errno? ([exn:fail:filesystem:errno?
(λ (exn) (λ (exn)
(begin0 (displayln "note: config file not detected, using defaults")
'() '())]
(displayln "note: config file not detected, using defaults")))]
[exn:fail:contract? [exn:fail:contract?
(λ (exn) (λ (exn)
(begin0 (displayln "note: config file empty or missing [] section, using defaults")
'() '())])
(displayln "note: config file empty or missing [] section, using defaults")))]) (define h (in-hash
(call-with-input-file path-config
(λ (in)
(read-ini in))))))
(define l (define l
(hash->list (for*/list : (Listof (Pairof Symbol String))
(hash-ref ([(section-key section) h]
(ini->hash [(key value) (in-hash section)])
(call-with-input-file path-config (if (eq? section-key '||)
(λ (in) (cons key value)
(read-ini in)))) (cons (string->symbol (string-append (symbol->string section-key)
'||))) "::"
(begin0 (symbol->string key)))
l value))))
(printf "note: ~a items loaded from config file~n" (length l))))) (printf "note: ~a items loaded from config file~n" (length l))
(define env-alist (define env-alist
(let ([e-names (environment-variables-names (current-environment-variables))] (for/list : (Listof (Pairof Symbol String))
[e-ref (λ ([name : Bytes]) ([name (environment-variables-names (current-environment-variables))]
(bytes->string/latin-1 #:when (string-prefix? (string-downcase (bytes->string/latin-1 name)) "bw_"))
(cast (environment-variables-ref (current-environment-variables) name) (cons
Bytes)))]) ;; key: convert to string, remove bw_ prefix, convert to symbol
(map (λ ([name : Bytes]) (string->symbol (string-downcase (substring (bytes->string/latin-1 name) 3)))
(cons (string->symbol (string-downcase (substring (bytes->string/latin-1 name) 3))) ;; value: convert to string
(e-ref name))) (bytes->string/latin-1
(filter (λ ([name : Bytes]) (string-prefix? (string-downcase (bytes->string/latin-1 name)) (cast (environment-variables-ref (current-environment-variables) name) Bytes)))))
;; get the current dataset so it can be stored above
(module+ fetch
(require racket/generator
(define r (get ""))
(define x (html->xexp (bytes->string/utf-8 (response-body r))))
(define english ((query-selector (λ (e a c) (equal? (get-attribute 'id a) "content1")) x)))
(define gen (query-selector (λ (e a c) (has-class? "member" a)) english))
(for/list ([item (in-producer gen #f)])
(define links (query-selector (λ (e a c) (eq? e 'a)) item))
(define url (get-attribute 'href (bits->attributes (links))))
(define title (third (links)))
(define icon (get-attribute 'src (bits->attributes ((query-selector (λ (e a c) (eq? e 'img)) item)))))
(define description (second ((query-selector (λ (e a c) (eq? e 'p)) item))))
(list '() title url icon description)))

src/extwiki-generic.rkt Normal file
View file

@ -0,0 +1,125 @@
#lang racket/base
(require racket/list
(module+ test
(require rackunit))
;; fandom wikinames * Title * Main Page * Search page override * API endpoint override
(define wikis
'(((gallowmere) "Gallowmere Historia" "" #f #f)
((fallout) "Fallout Wiki" "" #f "")
(define wikis-hash (make-hash))
(for ([w wikis])
(for ([wikiname (car w)])
(hash-set! wikis-hash (symbol->string wikiname) w)))
(module+ test
(check-equal? (cadr (hash-ref wikis-hash "gallowmere"))
"Gallowmere Historia"))
(define (parse-table table)
(define rows (query-selector (λ (t a c) (eq? t 'tr)) table))
(define header-row (rows))
(define column-names
(for/list ([th (in-producer (query-selector (λ (t a c) (eq? t 'th)) header-row) #f)])
(string->symbol (string-downcase (string-trim (findf string? th))))))
(define data-row (rows))
(for/hasheq ([col-name column-names]
[col-value (in-producer (query-selector (λ (t a c) (eq? t 'td)) data-row) #f)])
(values col-name (filter element-is-content? (cdr col-value)))))
(module+ test
(check-equal? (parse-table (html->xexp "<table> <tbody><tr> <th>Links</th></tr> <tr> <td><a target=\"_blank\" rel=\"nofollow noreferrer noopener\" class=\"external text\" href=\"\">Forum</a></td></tr></tbody></table>"))
'#hasheq((links . ((a (@ (target "_blank") (rel "nofollow noreferrer noopener") (class "external text") (href "")) "Forum"))))))
(define (table->links table)
(define v (hash-ref table 'links #f))
[(not v) (values null '("Data table must have a \"Links\" column"))]
(var links (filter (λ (a) (and (pair? a) (eq? (car a) 'a))) v)) ; <a> elements
[(null? links) (values null '("Links column must have at least one link"))]
[#t (values links null)]))
(define (table->logo table)
(define logo (hash-ref table 'logo #f))
[(not logo) (values #f '("Data table must have a \"Logo\" column"))]
[(null? logo) (values #f '("Logo table column must have a link"))]
(var href (get-attribute 'href (bits->attributes (car (hash-ref table 'logo)))))
[(not href) (values #f '("Logo table column must have a link"))]
[#t (values href null)]))
(define (get-api-endpoint wiki)
(define main-page (third wiki))
(define override (fifth wiki))
(or override
(match main-page
[(regexp #rx"/$") (string-append main-page "api.php")]
[(regexp #rx"^(.*)/wiki/" (list _ domain)) (string-append domain "/w/api.php")]
[(regexp #rx"^(.*)/w/" (list _ domain)) (string-append domain "/api.php")]
[_ (error 'get-api-endpoint "unknown url format: ~a" main-page)])))
(define (get-search-page wiki)
(define main-page (third wiki))
(define override (fourth wiki))
(or override
(match main-page
[(regexp #rx"/$") (string-append main-page "Special:Search")]
[(regexp #rx"^(.*/w[^./]*/)" (list _ wiki-prefix)) (string-append wiki-prefix "Special:Search")]
[_ (error 'get-search-page "unknown url format: ~a" main-page)])))
(define/memoize (get-redirect-content wikiname) #:hash hash
(define wiki (hash-ref wikis-hash wikiname #f))
(define display-name (cadr wiki))
(define endpoint (string-append (get-api-endpoint wiki) "?action=parse&page=MediaWiki:BreezeWikiRedirect&prop=text&formatversion=2&format=json"))
(define res (get endpoint))
(define html (jp "/parse/text" (response-json res)))
(define content ((query-selector (λ (t a c) (has-class? "mw-parser-output" a))
(html->xexp html))))
(define body (for/list ([p (in-producer (query-selector (λ (t a c) (eq? t 'p)) content) #f)]) p))
(define table (parse-table ((query-selector (λ (t a c) (eq? t 'table)) content))))
(define-values (links links-errors) (table->links table))
(define-values (logo logo-errors) (table->logo table))
(define construct-errors (append links-errors logo-errors))
(λ (title)
(define go
(string-append (get-search-page wiki)
(params->query `(("search" . ,title)
("go" . "Go")))))
`(aside (@ (class "niwa__notice"))
(h1 (@ (class "niwa__header")) ,display-name " has its own website separate from Fandom.")
(div (@ (class "niwa__cols"))
(div (@ (class "niwa__left"))
(a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,display-name "")
(p "This wiki's core community has wholly migrated away from Fandom. You should "
(a (@ (href ,go)) "go to " ,display-name " now!")))
(div (@ (class "niwa__right"))
(img (@ (class "niwa__logo") (src ,logo)))))
,(if (pair? links)
`(p (@ (class "niwa__feedback"))
,@(add-between links " / "))
,(if (pair? construct-errors)
,@(for/list ([error construct-errors])
`(li ,error)))
[#t #f]))
(module+ test
((get-redirect-content "gallowmere") "Gallowmere Historia"))

View file

;; get the current dataset so it can be stored above
(module+ fetch
(require racket/generator
(define r (get ""))
(define x (html->xexp (bytes->string/utf-8 (response-body r))))
(define english ((query-selector (λ (e a c) (equal? (get-attribute 'id a) "content1")) x)))
(define gen (query-selector (λ (e a c) (has-class? "member" a)) english))
(for/list ([item (in-producer gen #f)])
(define links (query-selector (λ (e a c) (eq? e 'a)) item))
(define url (get-attribute 'href (bits->attributes (links))))
(define title (third (links)))
(define icon (get-attribute 'src (bits->attributes ((query-selector (λ (e a c) (eq? e 'img)) item)))))
(define description (second ((query-selector (λ (e a c) (eq? e 'p)) item))))
(list '() title url icon description)))

View file

@ -16,9 +16,10 @@
"config.rkt" "config.rkt"
"data.rkt" "data.rkt"
"page-wiki.rkt" "page-wiki.rkt"
"syntax.rkt" "../lib/syntax.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "whole-utils.rkt"
(provide (provide
page-category) page-category)

View file

@ -16,9 +16,10 @@
"config.rkt" "config.rkt"
"data.rkt" "data.rkt"
"page-wiki.rkt" "page-wiki.rkt"
"syntax.rkt" "../lib/syntax.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "whole-utils.rkt"
(provide page-file) (provide page-file)
@ -101,47 +102,48 @@
`"")))) `""))))
(define (page-file req) (define (page-file req)
(define wikiname (path/param-path (first (url-path (request-uri req))))) (response-handler
(define prefixed-title (path/param-path (caddr (url-path (request-uri req))))) (define wikiname (path/param-path (first (url-path (request-uri req)))))
(define origin (format "" wikiname)) (define prefixed-title (path/param-path (caddr (url-path (request-uri req)))))
(define source-url (format "~a/wiki/~a" origin prefixed-title)) (define origin (format "" wikiname))
(define source-url (format "~a/wiki/~a" origin prefixed-title))
(thread-let ([media-detail (thread-let
(define dest-url ([media-detail (define dest-url
(format "~a/wikia.php?~a" (format "~a/wikia.php?~a"
origin origin
(params->query `(("format" . "json") ("controller" . "Lightbox") (params->query `(("format" . "json") ("controller" . "Lightbox")
("method" . "getMediaDetail") ("method" . "getMediaDetail")
("fileTitle" . ,prefixed-title))))) ("fileTitle" . ,prefixed-title)))))
(log-outgoing dest-url) (log-outgoing dest-url)
(define dest-res (easy:get dest-url #:timeouts timeouts)) (define dest-res (easy:get dest-url #:timeouts timeouts))
(easy:response-json dest-res)] (easy:response-json dest-res)]
[siteinfo (siteinfo-fetch wikiname)]) [siteinfo (siteinfo-fetch wikiname)])
(if (not (jp "/exists" media-detail #f)) (if (not (jp "/exists" media-detail #f))
(next-dispatcher) (next-dispatcher)
(response-handler (response-handler
(define file-title (jp "/fileTitle" media-detail "")) (define file-title (jp "/fileTitle" media-detail ""))
(define title (define title
(if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title))
(define image-content-type (define image-content-type
(if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) (if (non-empty-string? (jp "/videoEmbedCode" media-detail ""))
#f #f
(url-content-type (jp "/imageUrl" media-detail)))) (url-content-type (jp "/imageUrl" media-detail))))
(define body (define body
(generate-results-page #:req req (generate-results-page #:req req
#:source-url source-url #:source-url source-url
#:wikiname wikiname #:wikiname wikiname
#:title title #:title title
#:media-detail media-detail #:media-detail media-detail
#:image-content-type image-content-type #:image-content-type image-content-type
#:siteinfo siteinfo)) #:siteinfo siteinfo))
(when (config-true? 'debug) (when (config-true? 'debug)
; used for its side effects ; used for its side effects
; convert to string with error checking, error will be raised if xexp is invalid ; convert to string with error checking, error will be raised if xexp is invalid
(xexp->html body)) (xexp->html body))
(response/output #:code 200 (response/output #:code 200
#:headers (build-headers always-headers) #:headers (build-headers always-headers)
(λ (out) (write-html body out))))))) (λ (out) (write-html body out))))))))
(module+ test (module+ test
(parameterize ([(config-parameter 'strict_proxy) "true"]) (parameterize ([(config-parameter 'strict_proxy) "true"])
(check-equal? (get-media-html "" "image/jpeg") (check-equal? (get-media-html "" "image/jpeg")

View file

@ -5,8 +5,8 @@
web-server/http web-server/http
"application-globals.rkt" "application-globals.rkt"
"data.rkt" "data.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "../lib/xexpr-utils.rkt")
(provide (provide
page-global-search) page-global-search)

View file

@ -6,8 +6,8 @@
"application-globals.rkt" "application-globals.rkt"
"data.rkt" "data.rkt"
"static-data.rkt" "static-data.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt" "../lib/xexpr-utils.rkt"
"config.rkt") "config.rkt")
(provide (provide
@ -26,13 +26,16 @@
(define content (define content
`((h2 "BreezeWiki makes wiki pages on Fandom readable") `((h2 "BreezeWiki makes wiki pages on Fandom readable")
(p "It removes ads, videos, and suggested content, leaving you with a clean page that doesn't slow down your device or use up your data.") (p "It removes ads, videos, and suggested content, leaving you with a clean page that doesn't slow down your device or use up your data.")
(p "BreezeWiki can also be called an \"alternative frontend for Fandom\".")
(p ,(format "To use BreezeWiki, just replace \"\" with \"~a\", and you'll instantly be teleported to a better world." (p ,(format "To use BreezeWiki, just replace \"\" with \"~a\", and you'll instantly be teleported to a better world."
(if (config-true? 'canonical_origin) (if (config-true? 'canonical_origin)
(url-host (string->url (config-get 'canonical_origin))) (url-host (string->url (config-get 'canonical_origin)))
""))) "")))
(p "If you'd like to be automatically sent to BreezeWiki every time in the future, " (p "If you'd like to be automatically sent to BreezeWiki every time in the future, "
(a (@ (href "")) "get our affiliated browser extension (NEW!)")
" or "
(a (@ (href "")) "check out the tutorial in the manual.")) (a (@ (href "")) "check out the tutorial in the manual."))
(p "BreezeWiki is available on several different websites called " (a (@ (href "")) "mirrors") ". Each is independently run. If one mirror is offline, the others still work. "
(a (@ (href "")) "See the list."))
(h2 "Find a page") (h2 "Find a page")
(form (@ (action "/search")) (form (@ (action "/search"))
(label (@ (class "paired__label")) (label (@ (class "paired__label"))
@ -50,7 +53,7 @@
examples)) examples))
(h2 "Testimonials") (h2 "Testimonials")
(p (@ (class "testimonial")) ">so glad someone introduced me to a F*ndom alternative (BreezeWiki) because that x-factorized spillway of an ad-infested radioactive dumpsite can go die in a fire —RB") (p (@ (class "testimonial")) ">so glad someone introduced me to a F*ndom alternative (BreezeWiki) because that x-factorized spillway of an ad-infested radioactive dumpsite can go die in a fire —RB")
(p (@ (class "testimonial")) ">you are so right that fandom still sucks even with adblock somehow. even zapping all the stupid padding it still sucks —Minimus") (p (@ (class "testimonial")) ">apparently there are thousands of people essentially running our company " (em "for free") " right now, creating tons of content, and we just put ads on top of it and they're not even employees. thousands of people we can't lay off. thousands! —" (a (@ (href "") (target "_blank")) "Perkins Miller, Fandom CEO"))
(p (@ (class "testimonial")) ">attempting to go to a wiki's forum page with breezewiki doesn't work, which is based honestly —Tom Skeleton") (p (@ (class "testimonial")) ">attempting to go to a wiki's forum page with breezewiki doesn't work, which is based honestly —Tom Skeleton")
(p (@ (class "testimonial")) ">Fandom pages crashing and closing, taking forever to load and locking up as they load the ads on the site... they are causing the site to crash because they are trying to load video ads both at the top and bottom of the site as well as two or three banner ads, then a massive top of site ad and eventually my anti-virus shuts the whole site down because it's literally pulling more resources than WoW in ultra settings... —Anonymous") (p (@ (class "testimonial")) ">Fandom pages crashing and closing, taking forever to load and locking up as they load the ads on the site... they are causing the site to crash because they are trying to load video ads both at the top and bottom of the site as well as two or three banner ads, then a massive top of site ad and eventually my anti-virus shuts the whole site down because it's literally pulling more resources than WoW in ultra settings... —Anonymous")
(p (@ (class "testimonial")) ">reblogs EXTREMELY appreciated I want that twink* (*fandom wiki) obliterated —footlong") (p (@ (class "testimonial")) ">reblogs EXTREMELY appreciated I want that twink* (*fandom wiki) obliterated —footlong")

View file

@ -9,8 +9,8 @@
web-server/http web-server/http
(only-in web-server/dispatchers/dispatch next-dispatcher) (only-in web-server/dispatchers/dispatch next-dispatcher)
"application-globals.rkt" "application-globals.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "../lib/xexpr-utils.rkt")
(provide (provide
page-proxy) page-proxy)

View file

@ -3,8 +3,8 @@
web-server/http web-server/http
"application-globals.rkt" "application-globals.rkt"
"data.rkt" "data.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "../lib/xexpr-utils.rkt")
(provide (provide
redirect-wiki-home) redirect-wiki-home)

View file

@ -13,9 +13,10 @@
"application-globals.rkt" "application-globals.rkt"
"config.rkt" "config.rkt"
"data.rkt" "data.rkt"
"syntax.rkt" "../lib/syntax.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "whole-utils.rkt"
(provide (provide
page-search) page-search)
@ -60,6 +61,8 @@
(define wikiname (path/param-path (first (url-path (request-uri req))))) (define wikiname (path/param-path (first (url-path (request-uri req)))))
(define query (dict-ref (url-query (request-uri req)) 'q #f)) (define query (dict-ref (url-query (request-uri req)) 'q #f))
(define origin (format "" wikiname)) (define origin (format "" wikiname))
(when (config-true? 'feature_offline::only)
(raise-user-error "Full search is currently not available on - for now, please use the pop-up search suggestions or wait for me to fix it! Thanks <3"))
(define dest-url (define dest-url
(format "~a/api.php?~a" (format "~a/api.php?~a"
origin origin
@ -87,5 +90,6 @@
(λ (out) (λ (out)
(write-html body out)))))) (write-html body out))))))
(module+ test (module+ test
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") (parameterize ([(config-parameter 'feature_offline::only) "false"])
(generate-results-page test-req "" "test" "Gacha" search-json-data))))) (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
(generate-results-page test-req "" "test" "Gacha" search-json-data))))))

View file

@ -4,8 +4,8 @@
web-server/http web-server/http
"application-globals.rkt" "application-globals.rkt"
"data.rkt" "data.rkt"
"url-utils.rkt" "../lib/url-utils.rkt"
"xexpr-utils.rkt") "../lib/xexpr-utils.rkt")
(provide (provide
page-set-user-settings) page-set-user-settings)

View file

@ -0,0 +1,90 @@
#lang racket/base
(require racket/file
(only-in web-server/dispatchers/dispatch next-dispatcher)
(define-runtime-path path-archive "../storage/archive")
(define ((replacer wikiname) whole url)
(if (or (equal? url "")
(equal? url "'")
(string-contains? url "/resources-ucp/")
(string-contains? url "/fonts/")
(string-contains? url "/drm_fonts/")
(string-contains? url "//")
(string-contains? url "//")
(string-contains? url "dropbox")
(string-contains? url "only=styles")
(string-contains? url "https://https://")
(regexp-match? #rx"^%20|^'" url)
(regexp-match? #rx"^\"?data:" url))
(let* ([norm-url
[(string-prefix? url "https://") url]
[(string-prefix? url "http://") (regexp-replace #rx"http:" url "https:")]
[(string-prefix? url "//") (string-append "https:" url)]
[(string-prefix? url "/") (format "" wikiname url)]
[else (raise-user-error "While calling replace-style-for-images, this URL had an unknown format and couldn't be saved:" url)])])
(define p (image-url->values norm-url))
;; (printf "hashed: ~a~n -> ~a~n #-> ~a~n" url (car p) (cdr p))
(format "/archive/~a/images/~a" wikiname (cdr p))))))
(define (replace-style-for-images wikiname path)
(define content (file->string path))
(regexp-replace* #rx"url\\(([^)]*)\\)" content (replacer wikiname)))
(define (handle-style wikiname dest)
(when (config-true? 'debug)
(printf "using offline mode for style ~a ~a~n" wikiname dest))
(define fs-path (build-path path-archive wikiname "styles" dest))
(println fs-path)
(unless (file-exists? fs-path)
(define new-content (replace-style-for-images wikiname fs-path))
#:code 200
#:headers (list (header #"Content-Type" #"text/css")
(header #"Referrer-Policy" #"same-origin"))
(λ (out) (displayln new-content out)))))
(define (handle-image wikiname dest) ;; dest is the hash with no extension
(unless ((string-length dest) . >= . 40) (next-dispatcher))
(define dir (build-path path-archive wikiname "images" (substring dest 0 1) (substring dest 0 2)))
(unless (directory-exists? dir) (next-dispatcher))
(define candidates (directory-list dir))
(define target (path->string (findf (λ (f) (string-prefix? (path->string f) dest)) candidates)))
(unless target (next-dispatcher))
(define ext (substring target 41))
#:code 200
#:headers (list (header #"Content-Type" (ext->mime-type (string->bytes/latin-1 ext))))
(λ (out)
(call-with-input-file (build-path dir target)
(λ (in)
(copy-port in out)))))))
(define (page-static-archive req)
(define path (url-path (request-uri req)))
(define-values (_ wikiname kind dest) (apply values (map path/param-path path)))
(cond [(equal? kind "styles") (handle-style wikiname dest)]
[(equal? kind "images") (handle-image wikiname dest)]
[else (response-handler (raise-user-error "page-static-archive: how did we get here?" kind))]))

View file

@ -7,6 +7,7 @@
web-server/dispatchers/filesystem-map web-server/dispatchers/filesystem-map
(only-in web-server/dispatchers/dispatch next-dispatcher) (only-in web-server/dispatchers/dispatch next-dispatcher)
(prefix-in files: web-server/dispatchers/dispatch-files) (prefix-in files: web-server/dispatchers/dispatch-files)
"config.rkt") "config.rkt")
(provide (provide
@ -16,6 +17,7 @@
(require rackunit)) (require rackunit))
(define-runtime-path path-static "../static") (define-runtime-path path-static "../static")
(define-runtime-path path-archive "../storage/archive")
(define hash-ext-mime-type (define hash-ext-mime-type
(hash #".css" #"text/css" (hash #".css" #"text/css"
@ -25,45 +27,49 @@
#".woff2" #"font/woff2" #".woff2" #"font/woff2"
#".txt" #"text/plain")) #".txt" #"text/plain"))
(define (ext->mime-type ext)
(hash-ref hash-ext-mime-type ext))
(module+ test
(check-equal? (ext->mime-type #".png") #"image/png"))
(define (make-path segments) (define (make-path segments)
(map (λ (seg) (path/param seg '())) segments)) (map (λ (seg) (path/param seg '())) segments))
(module+ test (module+ test
(check-equal? (make-path '("static" "main.css")) (check-equal? (make-path '("static" "main.css"))
(list (path/param "static" '()) (path/param "main.css" '())))) (list (path/param "static" '()) (path/param "main.css" '()))))
;; given a request path, return a rewritten request path and the source directory on the filesystem to serve based on
(define (path-rewriter p) (define (path-rewriter p)
(cond (cond
; url is ^/static/... ? ; url is ^/static/... ?
[(equal? (path/param-path (car p)) "static") [(equal? (path/param-path (car p)) "static")
; rewrite to ^/... which will be treated as relative to static/ on the filesystem ; rewrite to ^/... which will be treated as relative to static/ on the filesystem
(cdr p)] (values (cdr p) path-static)]
; url is ^/archive/... ?
[(equal? (path/param-path (car p)) "archive")
; rewrite req to ^/<wikiname> and dir to /storage/archive
(values (cdr p) path-archive)]
; url is literally ^/robots.txt ; url is literally ^/robots.txt
[(equal? p (make-path '("robots.txt"))) [(equal? p (make-path '("robots.txt")))
; rewrite to ^/... -- it already is! ; rewrite to ^/... -- it already is!
p] (values p path-static)]
; not going to use the static file dispatcher ; not going to use the static file dispatcher
[#t (next-dispatcher)])) [#t (next-dispatcher)]))
(module+ test (module+ test
(check-equal? (path-rewriter (make-path '("static" "main.css"))) (check-equal? (call-with-values (λ () (path-rewriter (make-path '("static" "main.css")))) cons)
(make-path '("main.css"))) (cons (make-path '("main.css")) path-static))
(check-equal? (path-rewriter (make-path '("static" "robots.txt"))) (check-equal? (call-with-values (λ () (path-rewriter (make-path '("static" "robots.txt")))) cons)
(make-path '("robots.txt"))) (cons (make-path '("robots.txt")) path-static))
(check-equal? (path-rewriter (make-path '("robots.txt"))) (check-equal? (call-with-values (λ () (path-rewriter (make-path '("robots.txt")))) cons)
(make-path '("robots.txt")))) (cons (make-path '("robots.txt")) path-static))
(check-equal? (call-with-values (λ () (path-rewriter (make-path '("archive" "minecraft" "styles" "main.css")))) cons)
(cons (make-path '("minecraft" "styles" "main.css")) path-archive)))
(define (static-dispatcher conn old-req) (define (static-dispatcher conn old-req)
(define old-uri (request-uri old-req)) (define old-uri (request-uri old-req))
(define old-path (url-path old-uri)) (define old-path (url-path old-uri))
(define new-path (path-rewriter old-path)) (define-values (new-path source-dir) (path-rewriter old-path))
(define new-uri (struct-copy url old-uri [path new-path])) (define new-uri (struct-copy url old-uri [path new-path]))
(define new-req (struct-copy request old-req [uri new-uri])) (define new-req (struct-copy request old-req [uri new-uri]))
((files:make ((files:make
#:url->path (lambda (u) ((make-url->path path-static) u)) #:url->path (lambda (u) ((make-url->path source-dir) u))
#:path->headers (lambda (p) (list (header #"Access-Control-Allow-Origin" #"*")
(header #"Referrer-Policy" #"same-origin")))
#:path->mime-type (lambda (u) (ext->mime-type (path-get-extension u))) #:path->mime-type (lambda (u) (ext->mime-type (path-get-extension u)))
#:cache-no-cache (config-true? 'debug) #:cache-no-cache (config-true? 'debug)
#:cache-immutable (not (config-true? 'debug)) #:cache-immutable (not (config-true? 'debug))

View file

@ -9,8 +9,8 @@
(prefix-in lift: web-server/dispatchers/dispatch-lift) (prefix-in lift: web-server/dispatchers/dispatch-lift)
"application-globals.rkt" "application-globals.rkt"
"config.rkt" "config.rkt"
"syntax.rkt" "../lib/syntax.rkt"
"xexpr-utils.rkt") "../lib/xexpr-utils.rkt")
(provide (provide
subdomain-dispatcher) subdomain-dispatcher)

src/page-wiki-offline.rkt Normal file
View file

@ -0,0 +1,142 @@
#lang racket/base
(require racket/dict
; libs
(prefix-in easy: net/http-easy)
; html libs
; web server libs
; my libs
; used by the web server
(module+ test
(require rackunit))
(define-runtime-path path-archive "../storage/archive")
(define (page-wiki-offline req)
(define wikiname (path/param-path (first (url-path (request-uri req)))))
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
(define basename (url-segments->basename segments))
(define maybe-hashed-basename (if ((string-length basename) . > . 240)
(sha1 (string->bytes/latin-1 basename))
(define archive-format
(case (config-get 'feature_offline::format)
[(".json" "json") (cons "~a.json" (λ () (read-json)))]
[(".json.gz" "json.gz") (cons "~a.json.gz" (λ ()
(define-values (in out) (make-pipe))
(gunzip-through-ports (current-input-port) out)
(read-json in)))]
[else (error 'archive-format "unknown archive format configured")]))
(define fs-path (build-path path-archive wikiname (format (car archive-format) maybe-hashed-basename)))
(define source-url (format "" wikiname (basename->name-for-query basename)))
[(not (file-exists? fs-path))
(unless (config-true? 'feature_offline::only)
(define mirror-path (url->string (request-uri req)))
(define body
`(div (@ (class "unsaved-page"))
(style ".unsaved-page a { text-decoration: underline !important }")
(p " doesn't have this page saved.")
(p "You can see this page by visiting a BreezeWiki mirror:")
(li (a (@ (href ,(format "" mirror-path))) "View on"))
(li (a (@ (href ,(format "" mirror-path))) "View on"))
(li (a (@ (href ,source-url)) "or, you can see the original page on Fandom (ugh)")))
(p "If you'd like " ,wikiname " to be added to, " (a (@ (href "")) "let me know about it!")))
#:req req
#:source-url source-url
#:wikiname wikiname
#:title (url-segments->guess-title segments)
#:online-styles #f
#:siteinfo (siteinfo-fetch wikiname)
(when (config-true? 'debug)
; used for its side effects
; convert to string with error checking, error will be raised if xexp is invalid
(xexp->html body))
#:code 200
#:headers always-headers
(λ (out)
(write-html body out)))]
(when (config-true? 'debug)
(printf "using offline mode for ~v~n" fs-path))
(define data (with-input-from-file fs-path (cdr archive-format)))
(define article-title (jp "/parse/title" data))
(define original-page (html->xexp (preprocess-html-wiki (jp "/parse/text" data))))
(define page ((query-selector (λ (t a c) (has-class? "mw-parser-output" a)) original-page)))
(define initial-head-data ((head-data-getter wikiname) data))
(define user-cookies (user-cookies-getter req))
(define theme (user-cookies^-theme user-cookies))
(define head-data
(case theme
[(light dark)
(struct-copy head-data^ initial-head-data
[body-class (regexp-replace #rx"(theme-fandomdesktop-)(light|dark)"
(head-data^-body-class initial-head-data)
(format "\\1~a" theme))])]
[else initial-head-data]))
(define body
(update-tree-wiki page wikiname)
#:req req
#:source-url source-url
#:wikiname wikiname
#:title article-title
#:online-styles #f
#:head-data head-data
#:siteinfo (siteinfo-fetch wikiname)
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
(define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes"))
(define headers
; redirect-query-parameter: only the string "no" is significant:
(when (and redirect-msg
(not (equal? redirect-query-parameter "no")))
(let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))]
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
(header #"Refresh" value)))))
(when (config-true? 'debug)
; used for its side effects
; convert to string with error checking, error will be raised if xexp is invalid
(xexp->html body))
#:code 200
#:headers headers
(λ (out)
(write-html body out))))])))

View file

@ -7,7 +7,7 @@
; libs ; libs
(prefix-in easy: net/http-easy) (prefix-in easy: net/http-easy)
; html libs ; html libs
html-parsing "../lib/html-parsing/main.rkt"
html-writing html-writing
; web server libs ; web server libs
net/url net/url
@ -17,11 +17,12 @@
"application-globals.rkt" "application-globals.rkt"
"config.rkt" "config.rkt"
"data.rkt" "data.rkt"
"pure-utils.rkt" "../lib/pure-utils.rkt"
"syntax.rkt" "../lib/syntax.rkt"
"tree-updater.rkt" "../lib/tree-updater.rkt"
"xexpr-utils.rkt" "../lib/url-utils.rkt"
"url-utils.rkt") "whole-utils.rkt"
(provide (provide
; used by the web server ; used by the web server
@ -33,24 +34,6 @@
(module+ test (module+ test
(require rackunit)) (require rackunit))
(define (preprocess-html-wiki html)
(define ((rr* find replace) contents)
(regexp-replace* find contents replace))
; fix navbox list nesting
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
; add a <ul> to make the parser happy
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
(rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
; change <figcaption><p> to <figcaption><span> to make the parser happy
(rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
(module+ test
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
(check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> <p class=\"caption\">Caption text.</p></figcaption></figure>")
"<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
(define (page-wiki req) (define (page-wiki req)
(define wikiname (path/param-path (first (url-path (request-uri req))))) (define wikiname (path/param-path (first (url-path (request-uri req)))))
(define user-cookies (user-cookies-getter req)) (define user-cookies (user-cookies-getter req))

View file

@ -3,6 +3,7 @@
;;; Source: ;;; Source:
;;; Source commit: cae2a14 from 24 May 2015 ;;; Source commit: cae2a14 from 24 May 2015
;;; Source license: LGPL 3 or later ;;; Source license: LGPL 3 or later
;;; Further modifications by Cadence as seen in this repo's git history.
(provide (struct-out reloadable-entry-point) (provide (struct-out reloadable-entry-point)
reload-poll-interval reload-poll-interval
@ -19,8 +20,8 @@
(require racket/match) (require racket/match)
(require racket/rerequire) (require racket/rerequire)
(define reload-poll-interval 0.5) ;; seconds (define reload-poll-interval 0.5) ; seconds
(define reload-failure-retry-delay (make-parameter 5)) ;; seconds (define reload-failure-retry-delay (make-parameter 5)) ; seconds
(struct reloadable-entry-point (name (struct reloadable-entry-point (name
module-path module-path

src/whole-utils.rkt Normal file
View file

@ -0,0 +1,11 @@
#lang typed/racket/base
(require "config.rkt")
; prints "out: <url>"
(: log-outgoing (String -> Void))
(define (log-outgoing url-string)
(when (config-true? 'log_outgoing)
(printf "out: ~a~n" url-string)))

View file

@ -24,7 +24,7 @@
--theme-body-background-color: #286cab; --theme-body-background-color: #286cab;
--theme-body-background-color--rgb: 40,108,171; --theme-body-background-color--rgb: 40,108,171;
--theme-body-text-color: #fff; --theme-body-text-color: #000;
--theme-body-text-color--rgb: 255,255,255; --theme-body-text-color--rgb: 255,255,255;
--theme-body-text-color--hover: #cccccc; --theme-body-text-color--hover: #cccccc;
--theme-sticky-nav-background-color: #ffffff; --theme-sticky-nav-background-color: #ffffff;

View file

@ -31,6 +31,8 @@, button, input, textarea, .wikitable, .va-table {
font-family: "Source Sans Pro", "Segoe UI", sans-serif; font-family: "Source Sans Pro", "Segoe UI", sans-serif;
font-size: 18px; font-size: 18px;
line-height: 1.5; line-height: 1.5;
margin: 0;
padding: 0;
} }
h1, h2, h3, h4, h5, h6 { h1, h2, h3, h4, h5, h6 {
margin: 1.2em 0 0.6em; margin: 1.2em 0 0.6em;
@ -67,6 +69,25 @@ p {
max-width: 240px; max-width: 240px;
} }
/* global top banner message */
.bw-top-banner {
display: flex;
justify-content: space-evenly;
align-items: center;
background-color: #000;
color: #fff;
text-align: center;
white-space: pre-line;
padding: 8px;
.bw-top-banner a, .bw-top-banner a:visited {
color: #ffdd57;
text-decoration: underline;
.bw-top-banner-rainbow {
animation: bw-rainbow-color 1.6s linear infinite;
/* custom footer with source and license info */ /* custom footer with source and license info */
.custom-footer { .custom-footer {
clear: both; clear: both;
@ -338,8 +359,11 @@ figcaption, .lightbox-caption, .thumbcaption {
border-radius: 6px; border-radius: 6px;
font-size: 18px; font-size: 18px;
} }
.niwa__notice--alt {
background: #e5fdd8;
.niwa__header { .niwa__header {
font-size: max(2.9vw, 26px); font-size: max(2.75vw, 26px);
margin-top: 0; margin-top: 0;
} }
.niwa__notice a { .niwa__notice a {
@ -403,6 +427,10 @@ figcaption, .lightbox-caption, .thumbcaption {
.niwa__right { .niwa__right {
display: none; display: none;
} }
/* remove balloons in top banner */
.bw-balloon {
display: none;
} }
@media (min-width: 560px) { /* wider than 560 px */ @media (min-width: 560px) { /* wider than 560 px */
@ -418,6 +446,16 @@ figcaption, .lightbox-caption, .thumbcaption {
width: auto !important; width: auto !important;
text-align: center !important; text-align: center !important;
} }
/* make text content hit the edges of the screen (no space for the background) */
.page {
margin: 0;
.page__main {
background: linear-gradient(to bottom, rgba(var(--theme-page-background-color--rgb), 0), rgba(var(--theme-page-background-color--rgb), 1) 160px);
.page-title {
color: var(--theme-body-text-color);
} }
/* ***** /* *****
@ -456,3 +494,12 @@ figcaption, .lightbox-caption, .thumbcaption {
font-display: swap; font-display: swap;
src: url("/static/source-sans-pro-v21-vietnamese_latin-ext_latin_greek-ext_greek_cyrillic-ext_cyrillic-700italic.woff2") format("woff2"); src: url("/static/source-sans-pro-v21-vietnamese_latin-ext_latin_greek-ext_greek_cyrillic-ext_cyrillic-700italic.woff2") format("woff2");
} }
@keyframes bw-rainbow-color {
0% {
filter: hue-rotate(0deg);
100% {
filter: hue-rotate(360deg);

static/three-balloons.png Normal file

Binary file not shown.


Width:  |  Height:  |  Size: 6.2 KiB