breezewiki/src/page-wiki-offline.rkt

166 lines
6.8 KiB
Racket
Raw Permalink Normal View History

#lang racket/base
(require racket/dict
racket/file
racket/function
racket/list
racket/match
racket/path
racket/string
; libs
(prefix-in easy: net/http-easy)
file/sha1
file/gunzip
json
; html libs
"../lib/html-parsing/main.rkt"
html-writing
; web server libs
net/url
web-server/http
web-server/dispatchers/dispatch
; my libs
"application-globals.rkt"
2023-12-11 22:10:47 +00:00
"../archiver/archiver-database.rkt"
"config.rkt"
"data.rkt"
"log.rkt"
"page-wiki.rkt"
"../lib/archive-file-mappings.rkt"
"../lib/pure-utils.rkt"
"../lib/syntax.rkt"
"../lib/tree-updater.rkt"
"../lib/xexpr-utils.rkt"
"../lib/url-utils.rkt")
(provide
; used by the web server
page-wiki-offline)
(module+ test
(require rackunit))
(define path-archive (anytime-path ".." "storage/archive"))
2023-12-11 22:10:47 +00:00
(when (config-true? 'feature_offline::only)
(void (get-slc)))
(define (page-wiki-offline req)
(response-handler
(define wikiname (path/param-path (first (url-path (request-uri req)))))
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
(define basename (url-segments->basename segments))
(define maybe-hashed-basename (if ((string-length basename) . > . 240)
(sha1 (string->bytes/latin-1 basename))
basename))
(define user-cookies (user-cookies-getter req))
(define theme (user-cookies^-theme user-cookies))
(log-page-request #t wikiname maybe-hashed-basename theme)
(define archive-format
(case (config-get 'feature_offline::format)
[(".json" "json") (cons "~a.json" (λ () (read-json)))]
[(".json.gz" "json.gz") (cons "~a.json.gz" (λ ()
(define-values (in out) (make-pipe))
(gunzip-through-ports (current-input-port) out)
(read-json in)))]
[else (error 'archive-format "unknown archive format configured")]))
(define fs-path (build-path path-archive wikiname (format (car archive-format) maybe-hashed-basename)))
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname (basename->name-for-query basename)))
2023-12-11 22:10:47 +00:00
(cond/var
[(file-exists? fs-path)
(when (config-true? 'debug)
(printf "using offline mode for ~v~n" fs-path))
(response-handler
(define data (with-input-from-file fs-path (cdr archive-format)))
(define article-title (jp "/parse/title" data))
(define original-page (html->xexp (preprocess-html-wiki (jp "/parse/text" data))))
(define page ((query-selector (λ (t a c) (has-class? "mw-parser-output" a)) original-page)))
(define initial-head-data ((head-data-getter wikiname) data))
(define head-data
(case theme
[(light dark)
(struct-copy head-data^ initial-head-data
[body-class (regexp-replace #rx"(theme-fandomdesktop-)(light|dark)"
(head-data^-body-class initial-head-data)
(format "\\1~a" theme))])]
[else initial-head-data]))
(define body
(generate-wiki-page
2023-12-11 22:10:47 +00:00
(update-tree-wiki page wikiname)
#:req req
#:source-url source-url
#:wikiname wikiname
2023-12-11 22:10:47 +00:00
#:title article-title
#:online-styles #f
2023-12-11 22:10:47 +00:00
#:head-data head-data
#:siteinfo (siteinfo-fetch wikiname)
))
2023-12-11 22:10:47 +00:00
(define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes"))
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
(define redirect-msg-a (if redirect-msg
((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))
#f))
2023-12-11 22:10:47 +00:00
(define headers
(build-headers
always-headers
; redirect-query-parameter: only the string "no" is significant:
; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367
(when (and redirect-msg-a
2023-12-11 22:10:47 +00:00
(not (equal? redirect-query-parameter "no")))
(let* ([dest (get-attribute 'href (bits->attributes redirect-msg-a))]
2023-12-11 22:10:47 +00:00
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
(header #"Refresh" value)))))
(when (config-true? 'debug)
; used for its side effects
; convert to string with error checking, error will be raised if xexp is invalid
(xexp->html body))
(response/output
#:code 200
2023-12-11 22:10:47 +00:00
#:headers headers
(λ (out)
2023-12-11 22:10:47 +00:00
(write-html body out))))]
;; page not found on disk, perhaps it's a redirect? redirects are stored in the database
(var target (query-maybe-value* "select redirect from page where wikiname = ? and basename = ?" wikiname basename))
[target
; don't url decode the target, or Category: pages will be interpreted as a protocol
2024-01-09 04:35:26 +00:00
(generate-redirect (format "/~a/wiki/~a" wikiname (regexp-replace* #rx"#" target "/")))]
2023-12-11 22:10:47 +00:00
;; breezewiki doesn't have the page archived, see if we can make a network request for it
[(not (config-true? 'feature_offline::only))
(next-dispatcher)]
;; no possible way to provide the page
[else
(define mirror-path (url->string (request-uri req)))
(define body
(generate-wiki-page
`(div (@ (class "unsaved-page"))
(style ".unsaved-page a { text-decoration: underline !important }")
(p "breezewiki.com doesn't have this page saved.")
(p "You can see this page by visiting a BreezeWiki mirror:")
(ul
(li (a (@ (href ,(format "https://antifandom.com~a" mirror-path))) "View on antifandom.com"))
(li (a (@ (href ,(format "https://bw.artemislena.eu~a" mirror-path))) "View on artemislena.eu"))
(li (a (@ (href ,source-url)) "or, you can see the original page on Fandom (ugh)")))
(p "If you'd like " ,wikiname ".fandom.com to be added to breezewiki.com, " (a (@ (href "https://lists.sr.ht/~cadence/breezewiki-requests")) "let me know about it!")))
#:req req
#:source-url source-url
#:wikiname wikiname
#:title (url-segments->guess-title segments)
#:online-styles #f
#:siteinfo (siteinfo-fetch wikiname)
))
(when (config-true? 'debug)
; used for its side effects
; convert to string with error checking, error will be raised if xexp is invalid
(xexp->html body))
(response/output
#:code 200
#:headers always-headers
(λ (out)
(write-html body out)))])))