From 443f1eecbc4c15c8038920027e62fccbdcb0bbe7 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 23 Oct 2024 22:52:00 +1300 Subject: [PATCH 1/8] Add user agent and detect blocked pages --- src/fandom-request.rkt | 40 +++++++++++++++++++++++++++++++++------- src/page-wiki.rkt | 28 ++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/src/fandom-request.rkt b/src/fandom-request.rkt index 966eeee..c306b04 100644 --- a/src/fandom-request.rkt +++ b/src/fandom-request.rkt @@ -1,12 +1,16 @@ #lang typed/racket/base -(require "config.rkt" +(require racket/format + racket/string + "config.rkt" "../lib/url-utils.rkt") (define-type Headers (HashTable Symbol (U Bytes String))) (require/typed net/http-easy [#:opaque Timeout-Config timeout-config?] [#:opaque Response response?] [#:opaque Session session?] + [response-status-code (Response -> Natural)] [current-session (Parameter Session)] + [current-user-agent (Parameter (U Bytes String))] [make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)] [get ((U Bytes String) [#:close? Boolean] @@ -22,19 +26,41 @@ fandom-get-api timeouts) +(unless (string-contains? (~a (current-user-agent)) "BreezeWiki") + (current-user-agent + (format "BreezeWiki/1.0 (~a) ~a" + (if (config-true? 'canonical_origin) + (config-get 'canonical_origin) + "local") + (current-user-agent)))) + (define timeouts (make-timeout-config #:lease 5 #:connect 5)) +(: last-failure Flonum) +(define last-failure 0.0) +(: stored-failure (Option Response)) +(define stored-failure #f) +(define failure-persist-time 30000) + (: no-headers Headers) (define no-headers '#hasheq()) (: fandom-get (String String [#:headers (Option Headers)] -> Response)) (define (fandom-get wikiname path #:headers [headers #f]) - (define dest-url (string-append "https://www.fandom.com" path)) - (define host (string-append wikiname ".fandom.com")) - (log-outgoing wikiname path) - (get dest-url - #:timeouts timeouts - #:headers (hash-set (or headers no-headers) 'Host host))) + (or + (and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure) + (let () + (define dest-url (string-append "https://www.fandom.com" path)) + (define host (string-append wikiname ".fandom.com")) + (log-outgoing wikiname path) + (define res + (get dest-url + #:timeouts timeouts + #:headers (hash-set (or headers no-headers) 'Host host))) + (when (memq (response-status-code res) '(403 406)) + (set! last-failure (current-inexact-milliseconds)) + (set! stored-failure res)) + res))) (: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response)) (define (fandom-get-api wikiname params #:headers [headers #f]) diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index f16792c..da63617 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -18,6 +18,7 @@ "config.rkt" "data.rkt" "fandom-request.rkt" + "../lib/archive-file-mappings.rkt" "../lib/pure-utils.rkt" "../lib/syntax.rkt" "../lib/thread-utils.rkt" @@ -37,8 +38,9 @@ (define (page-wiki req) (define wikiname (path/param-path (first (url-path (request-uri req))))) + (define segments (map path/param-path (cdr (url-path (request-uri req))))) (define user-cookies (user-cookies-getter req)) - (define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/")) + (define path (string-join (cdr segments) "/")) (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) (define-values (dest-res siteinfo) @@ -101,9 +103,31 @@ (write-html body out))))))] [(eq? 404 (easy:response-status-code dest-res)) (next-dispatcher)] + [(memq (easy:response-status-code dest-res) '(403 406)) + (response-handler + (define body + (generate-wiki-page + `(div + (p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.") + (p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.") + (p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom.")))) + #:req req + #:source-url source-url + #:wikiname wikiname + #:title (url-segments->guess-title segments) + #:siteinfo siteinfo)) + (response/output + #:code 503 + #:headers (build-headers + always-headers + (header #"Retry-After" #"30") + (header #"Cache-Control" #"max-age=30, public") + (header #"Refresh" #"35")) + (λ (out) + (write-html body out))))] [else (response-handler - (error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a" + (error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a" wikiname path (easy:response-status-code dest-res) From 23a201cc841ef6a8cc0c06a5a6c3a19b77177aad Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 4 Nov 2025 23:06:55 +1300 Subject: [PATCH 2/8] Add JSONP mode and captcha JSONP mode is on by default. It will fetch main wiki pages in the browser, without the server needing to make any requests. To turn it off, add [feature_json] enabled = false to config.ini. Captcha is off by default. It is a custom solution and is still experimental at this stage. If you turn it on, please monitor the logs to see how it goes! config.ini options are as follows: [captcha] enabled = true|false log = true|false ip_header =
--- breezewiki.rkt | 15 +++- lib/tree-updater.rkt | 10 +++ src/config.rkt | 6 ++ src/data.rkt | 12 ++- src/dispatcher-tree.rkt | 10 +++ src/page-captcha.rkt | 177 ++++++++++++++++++++++++++++++++++++++++ src/page-wiki-jsonp.rkt | 68 +++++++++++++++ src/page-wiki.rkt | 98 ++++++++++++---------- static/captcha.js | 24 ++++++ static/jsonp.js | 57 +++++++++++++ static/robots.txt | 1 + 11 files changed, 431 insertions(+), 47 deletions(-) create mode 100644 src/page-captcha.rkt create mode 100644 src/page-wiki-jsonp.rkt create mode 100644 static/captcha.js create mode 100644 static/jsonp.js diff --git a/breezewiki.rkt b/breezewiki.rkt index 5fd34b2..17b619b 100644 --- a/breezewiki.rkt +++ b/breezewiki.rkt @@ -1,5 +1,7 @@ #lang racket/base -(require web-server/servlet-dispatch +(require racket/splicing + web-server/servlet-dispatch + web-server/safety-limits "src/config.rkt" "src/dispatcher-tree.rkt" "src/reloadable.rkt") @@ -9,6 +11,9 @@ (reloadable-entry-point->procedure (make-reloadable-entry-point (quote varname) filename)))) +(require-reloadable "src/page-captcha.rkt" page-captcha) +(require-reloadable "src/page-captcha.rkt" page-captcha-image) +(require-reloadable "src/page-captcha.rkt" page-captcha-verify) (require-reloadable "src/page-category.rkt" page-category) (require-reloadable "src/page-global-search.rkt" page-global-search) (require-reloadable "src/page-home.rkt" page-home) @@ -22,7 +27,9 @@ (require-reloadable "src/page-static-archive.rkt" page-static-archive) (require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher) (require-reloadable "src/page-wiki.rkt" page-wiki) +(require-reloadable "src/page-wiki.rkt" page-wiki-with-data) (require-reloadable "src/page-wiki-offline.rkt" page-wiki-offline) +(require-reloadable "src/page-wiki-jsonp.rkt" page-wiki-jsonp) (require-reloadable "src/page-file.rkt" page-file) (reload!) @@ -34,10 +41,14 @@ (if (config-true? 'debug) "127.0.0.1" #f) (config-get 'bind_host)) #:port (string->number (config-get 'port)) + #:safety-limits (make-safety-limits #:max-request-body-length (* 8 1024 1024)) (λ (quit) (channel-put ch (lambda () (semaphore-post quit))) (dispatcher-tree ; order of these does not matter + page-captcha + page-captcha-image + page-captcha-verify page-category page-global-search page-home @@ -48,7 +59,9 @@ page-set-user-settings page-static-archive page-wiki + page-wiki-with-data page-wiki-offline + page-wiki-jsonp page-file redirect-wiki-home static-dispatcher diff --git a/lib/tree-updater.rkt b/lib/tree-updater.rkt index 109c875..2abf56c 100644 --- a/lib/tree-updater.rkt +++ b/lib/tree-updater.rkt @@ -58,6 +58,16 @@ (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "thumbimage"))))) (figcaption "Test figure!")) + (div (@ (type "slideshow") (position "center") (widths "500") (mode "slideshow") (seq-no "0") (id "slideshow-0") (hash "b62d0efee427ad7dff1026e6e9dd078c") (class "wikia-slideshow wikia-gallery slideshow-center")) + (div (@ (class "wikia-slideshow-wrapper") (style "width: 500px") (data-test-outer-width)) + (div (@ (class "wikia-slideshow-images-wrapper")) + (ul (@ (class "wikia-slideshow-images neutral") (style "height: 375px; width: 500px") (data-test-inner-width)) + (li (@ (class "wikia-slideshow-first-image")) + (a (@ (class "image lightbox") (title "Expand slideshow") (id "slideshow-0-0") (style "width: 420px")) + (img (@ (data-src "https://static.wikia.nocookie.net/example/images/3/3d/Image.jpg/revision/latest/scale-to-width-down/500?cb=20140129105112") (class "thumbimage") (width "500") (height "281") (style "border: 0px;")))) + (div (@ (class "wikia-slideshow-overlay")) + (div (@ (class "wikia-slideshow-image-caption")) + "Example caption"))))))) (iframe (@ (src "https://example.com/iframe-src"))) (div (@ (class "reviews")) (header "GameSpot Expert Reviews")) diff --git a/src/config.rkt b/src/config.rkt index b1afe0a..0bbdfa3 100644 --- a/src/config.rkt +++ b/src/config.rkt @@ -49,6 +49,12 @@ (feature_offline::only . "false") (feature_offline::search . "fandom") + (feature_jsonp::enabled . "true") + + (captcha::enabled . "false") + (captcha::log . "false") + (captcha::ip_header . "") + (access_log::enabled . "false") (promotions::indie_wiki_buddy . "banner home"))) diff --git a/src/data.rkt b/src/data.rkt index 63c7f03..44b5bda 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -20,6 +20,7 @@ (struct-out license^) (struct-out head-data^) (struct-out user-cookies^) + data->siteinfo siteinfo-fetch siteinfo-default license-default @@ -66,12 +67,15 @@ ("formatversion" . "2")))) (cond [(= (easy:response-status-code res) 200) (define data (easy:response-json res)) - (siteinfo^ (jp "/query/general/sitename" data) - (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) - (license^ (jp "/query/rightsinfo/text" data) - (jp "/query/rightsinfo/url" data)))] + (data->siteinfo data)] [else siteinfo-default])])) +(define (data->siteinfo data) + (siteinfo^ (jp "/query/general/sitename" data) + (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) + (license^ (jp "/query/rightsinfo/text" data) + (jp "/query/rightsinfo/url" data)))) + (define/memoize (head-data-getter wikiname) #:hash hash ;; data will be stored here, can be referenced by the memoized closure (define this-data head-data-default) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index a967095..0bbf8c8 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -56,15 +56,25 @@ (sequencer:make subdomain-dispatcher (pathprocedure:make "/" (page ds page-home)) + (filter:make #rx"^/static/" (hash-ref ds 'static-dispatcher)) + (filter:make (pregexp "^/captcha/img/[0-9]+/[0-9]+$") (lift:make (page ds page-captcha-image))) + (filter:make (pregexp "^/captcha/verify/[0-9]+/[0-9]+/[0-9]+/[0-9]+$") (lift:make (page ds page-captcha-verify))) + (if (config-true? 'captcha::enabled) + (lift:make (page ds page-captcha)) + (λ (_conn _req) (next-dispatcher))) (pathprocedure:make "/proxy" (page ds page-proxy)) (pathprocedure:make "/search" (page ds page-global-search)) (pathprocedure:make "/set-user-settings" (page ds page-set-user-settings)) (pathprocedure:make "/buddyfight/wiki/It_Doesn't_Work!!" (page ds page-it-works)) + (pathprocedure:make "/api/render/wiki" (page ds page-wiki-with-data)) (filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (page ds page-category))) (filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (page ds page-file))) (if (config-true? 'feature_offline::enabled) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki-offline))) (λ (_conn _req) (next-dispatcher))) + (if (config-true? 'feature_jsonp::enabled) + (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki-jsonp))) + (λ (_conn _req) (next-dispatcher))) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki))) (filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (page ds page-search))) (filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (page ds redirect-wiki-home))) diff --git a/src/page-captcha.rkt b/src/page-captcha.rkt new file mode 100644 index 0000000..dcd72be --- /dev/null +++ b/src/page-captcha.rkt @@ -0,0 +1,177 @@ +#lang racket/base +(require racket/class + racket/dict + racket/draw + pict + file/convertible + racket/format + racket/list + racket/math + racket/match + web-server/http + (only-in web-server/dispatchers/dispatch next-dispatcher) + net/url + (only-in net/cookies/server cookie->set-cookie-header cookie-header->alist) + html-writing + "application-globals.rkt" + "data.rkt" + "config.rkt" + "static-data.rkt" + "../lib/url-utils.rkt" + "../lib/xexpr-utils.rkt") + +(provide + page-captcha + page-captcha-image + page-captcha-verify) + +(define (get-ip req) + (define header + (if (config-true? 'captcha::ip_header) + (headers-assq* (string->bytes/utf-8 (config-get 'captcha::ip_header)) (request-headers/raw req)) + #f)) + (if header + (~a (header-value header)) + (request-client-ip req))) + +(define (get-rng req) + (parameterize ([current-pseudo-random-generator (make-pseudo-random-generator)]) + (define ip-segments (regexp-match* "[0-9]+" (get-ip req))) + (define seed + (modulo + (for/sum ([i (in-naturals)] + [s ip-segments]) + (* (add1 i) (add1 (string->number s)))) + (expt 2 32))) + (random-seed seed) + (current-pseudo-random-generator))) + +(define (get-key-solution req) + (parameterize ([current-pseudo-random-generator (get-rng req)]) + (random 1 (add1 9)))) + +(define diameter 35) +(define font (make-object font% 12 'system)) +(define msg "I'm not a robot!") +(define checkbox (filled-ellipse diameter diameter #:color "Pale Goldenrod")) +(define assembly + (frame + (inset + (hc-append + 8 + checkbox + (text msg font)) + 8))) +(define-values (inner-x inner-y) (cc-find assembly checkbox)) +(define-values (lt-x lt-y) (lt-find assembly checkbox)) +(define-values (rb-x rb-y) (rb-find assembly checkbox)) + +(define (get-coordinate-solution req w h) + (parameterize ([current-pseudo-random-generator (get-rng req)]) + (values (random (exact-truncate lt-x) (exact-truncate (- w rb-x))) + (random (exact-truncate lt-y) (exact-truncate (- h rb-y)))))) + +(define (page-captcha req) + (define cookie-header (headers-assq* #"cookie" (request-headers/raw req))) + (define cookies-alist (if cookie-header (cookie-header->alist (header-value cookie-header) bytes->string/utf-8) null)) + (for ([pair cookies-alist]) + (match pair + [(cons "captcha" method) + (when (config-true? 'captcha::log) + (printf "captcha skip - via ~a [~a] - ~v~n" method (get-ip req) (url->string (request-uri req)))) + (next-dispatcher)] + [_ (void)])) + (response-handler + (define body + `(*TOP* + (*DECL* DOCTYPE html) + (html + (head + (meta (@ (name "viewport") (content "width=device-width, initial-scale=1"))) + (title "Checking you're not a bot...") + (link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "internal.css")))) + (link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "main.css")))) + (link (@ (rel "icon") (href ,(head-data^-icon-url head-data-default)))) + (script (@ (defer) (src "/static/captcha.js"))) + (body (@ (class "skin-fandomdesktop theme-fandomdesktop-light internal")) + (div (@ (class "main-container")) + (div (@ (class "fandom-community-header__background tileBoth header"))) + (div (@ (class "page")) + (main (@ (class "page__main")) + (div (@ (class "custom-top")) + (h1 (@ (class "page-title")) + "Checking you're not a bot...")) + (div (@ (id "content") #;(class "page-content")) + (div (@ (id "mw-content-text")) + (p "To confirm, please click directly in the circle, or hold down the " ,(~a (get-key-solution req)) " key on your keyboard.") + (noscript (p "JavaScript is required for the captcha. Sorry!")) + (div (@ (id "captcha-area"))))) + ,(application-footer #f))))))))) + (when (config-true? 'debug) + (xexp->html body)) + (response/output + #:code 200 + #:headers always-headers + (λ (out) + (write-html body out))))) + +(define (page-captcha-image req) + (response-handler + (define w (string->number (path/param-path (third (url-path (request-uri req)))))) + (define h (string->number (path/param-path (fourth (url-path (request-uri req)))))) + (define-values (at-x at-y) (get-coordinate-solution req w h)) + (when (config-true? 'captcha::log) + (printf "captcha show - size ~a x ~a - solution ~a x ~a [~a]~n" w h at-x at-y (get-ip req))) + #;(printf "target: ~a x ~a~ncanvas: ~a x ~a~npict size: ~a-~a ~a-~a~n" at-x at-y x y lt-x rb-x lt-y rb-y) + (define dc (make-object bitmap-dc% #f)) + (send dc set-font font) + (define bm (make-object bitmap% w h #f #f)) + (send dc set-bitmap bm) + (draw-pict + assembly + dc + (- at-x inner-x) + (- at-y inner-y)) + (define image (convert bm 'png-bytes)) + (response/output + #:mime-type #"image/png" + #:headers (list (header #"Cache-Control" #"no-cache")) + (λ (out) (display image out))))) + +(define (page-captcha-verify req) + (response-handler + (match-define (list w h x y) (for/list ([segment (drop (url-path (request-uri req)) 2)]) + (string->number (path/param-path segment)))) + #;(printf "solution: ~a x ~a~ncoordinate: ~a x ~a~ndist^2: ~a~n" solution-x solution-y x y dist) + (define headers + (build-headers + always-headers + (cond + [(and (= y 0) (= x (get-key-solution req))) + (when (config-true? 'captcha::log) + (printf "captcha pass - key ~a [~a]~n" x (get-ip req))) + + (header #"Set-Cookie" (cookie->set-cookie-header (make-cookie "captcha" "key" #:path "/" #:max-age (* 60 60 24 365 10))))] + [else + (when (config-true? 'captcha::log) + (printf "captcha fail - key ~a instead of ~a [~a]~n" x (get-key-solution req) (get-ip req)))]) + (when (> y 0) + (let-values ([(solution-x solution-y) (get-coordinate-solution req w h)]) + (let ([dist (+ (expt (- x solution-x) 2) (expt (- y solution-y) 2))]) + (cond + [(dist . < . (expt (/ diameter 2) 2)) + (when (config-true? 'captcha::log) + (printf "captcha pass - coordinate, dist^2 ~a [~a]~n" dist (get-ip req))) + (header #"Set-Cookie" (cookie->set-cookie-header (make-cookie "captcha" "coordinate" #:path "/" #:max-age (* 60 60 24 365 10))))] + [else + (when (config-true? 'captcha::log) + (printf "captcha pass - coordinate, dist^2 ~a [~a]~n" dist (get-ip req)))])))))) + (match (dict-ref (url-query (request-uri req)) 'from #f) + [(? string? dest) + (response/output + #:code 302 + #:mime-type #"text/plain" + #:headers (cons (header #"Location" (string->bytes/utf-8 dest)) headers) + (λ (out) + (displayln "Checking your answer..." out)))] + [#f (next-dispatcher)]))) diff --git a/src/page-wiki-jsonp.rkt b/src/page-wiki-jsonp.rkt new file mode 100644 index 0000000..3e49abf --- /dev/null +++ b/src/page-wiki-jsonp.rkt @@ -0,0 +1,68 @@ +#lang racket/base +(require racket/list + racket/string + web-server/http + net/url-structs + html-writing + "application-globals.rkt" + "data.rkt" + "config.rkt" + "../lib/url-utils.rkt" + "../lib/xexpr-utils.rkt" + "../lib/archive-file-mappings.rkt") + +(provide + page-wiki-jsonp) + +(define (page-wiki-jsonp req) + (response-handler + (define wikiname (path/param-path (first (url-path (request-uri req))))) + (define segments (map path/param-path (cdr (url-path (request-uri req))))) + (define path (string-join (cdr segments) "/")) + (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) + + (define wiki-page-script-url + (format "https://~a.fandom.com/api.php?~a" + wikiname + (params->query `(("action" . "parse") + ("page" . ,path) + ("prop" . "text|headhtml|langlinks") + ("formatversion" . "2") + ("format" . "json") + ("callback" . "wikiPageCallback"))))) + (define siteinfo-script-url + (format "https://~a.fandom.com/api.php?~a" + wikiname + (params->query `(("action" . "query") + ("meta" . "siteinfo") + ("siprop" . "general|rightsinfo") + ("format" . "json") + ("formatversion" . "2") + ("callback" . "siteinfoCallback"))))) + + (define body + (generate-wiki-page + `(div + (noscript "You have to enable JavaScript to load wiki pages. Sorry!") + (div (@ (id "loading"))) + (progress (@ (id "progress") (style "margin-bottom: 50vh"))) + (script ,(format #<guess-title segments) + #:siteinfo siteinfo-default)) + (when (config-true? 'debug) + (xexp->html body)) + (response/output + #:code 200 + #:headers always-headers + (λ (out) + (write-html body out))))) diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index da63617..82956e4 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -6,6 +6,7 @@ racket/string ; libs (prefix-in easy: net/http-easy) + json ; html libs "../lib/html-parsing/main.rkt" html-writing @@ -19,8 +20,6 @@ "data.rkt" "fandom-request.rkt" "../lib/archive-file-mappings.rkt" - "../lib/pure-utils.rkt" - "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/tree-updater.rkt" "../lib/url-utils.rkt" @@ -29,6 +28,7 @@ (provide ; used by the web server page-wiki + page-wiki-with-data ; used by page-category, and similar pages that are partially wiki pages update-tree-wiki preprocess-html-wiki) @@ -59,48 +59,10 @@ (cond [(eq? 200 (easy:response-status-code dest-res)) - (let* ([data (easy:response-json dest-res)] - [title (jp "/parse/title" data "")] - [page-html (jp "/parse/text" data "")] - [page-html (preprocess-html-wiki page-html)] - [page (html->xexp page-html)] - [head-data ((head-data-getter wikiname) data)]) + (let ([data (easy:response-json dest-res)]) (if (equal? "missingtitle" (jp "/error/code" data #f)) (next-dispatcher) - (response-handler - (define body - (generate-wiki-page - (update-tree-wiki page wikiname) - #:req req - #:source-url source-url - #:wikiname wikiname - #:title title - #:head-data head-data - #:siteinfo siteinfo)) - (define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes")) - (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) - (define redirect-msg-a (if redirect-msg - ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg)) - #f)) - (define headers - (build-headers - always-headers - ; redirect-query-parameter: only the string "no" is significant: - ; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367 - (when (and redirect-msg-a - (not (equal? redirect-query-parameter "no"))) - (let* ([dest (get-attribute 'href (bits->attributes redirect-msg-a))] - [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (header #"Refresh" value))))) - (when (config-true? 'debug) - ; used for its side effects - ; convert to string with error checking, error will be raised if xexp is invalid - (xexp->html body)) - (response/output - #:code 200 - #:headers headers - (λ (out) - (write-html body out))))))] + (take-json-rewrite-and-return-page data)))] [(eq? 404 (easy:response-status-code dest-res)) (next-dispatcher)] [(memq (easy:response-status-code dest-res) '(403 406)) @@ -132,3 +94,55 @@ path (easy:response-status-code dest-res) (easy:response-body dest-res)))])) + +(define (page-wiki-with-data req) + (define post-data/bytes (request-post-data/raw req)) + (define post-data/string (bytes->string/utf-8 post-data/bytes)) + (define post-data (string->jsexpr post-data/string)) + (define wikiname (jp "/wikiname" post-data)) + (define path (jp "/path" post-data)) + (take-json-rewrite-and-return-page + #:req req + #:wikiname wikiname + #:source-url (format "https://~a.fandom.com/wiki/~a" wikiname path) + #:data (jp "/data" post-data) + #:siteinfo (data->siteinfo (jp "/siteinfo" post-data)))) + +(define (take-json-rewrite-and-return-page #:req req #:wikiname wikiname #:source-url source-url #:data data #:siteinfo siteinfo) + (define title (jp "/parse/title" data "")) + (define page-html (preprocess-html-wiki (jp "/parse/text" data ""))) + (define page (html->xexp page-html)) + (define head-data ((head-data-getter wikiname) data)) + (response-handler + (define body + (generate-wiki-page + (update-tree-wiki page wikiname) + #:req req + #:source-url source-url + #:wikiname wikiname + #:title title + #:head-data head-data + #:siteinfo siteinfo)) + (define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes")) + (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) + (define redirect-msg-a (if redirect-msg + ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg)) + #f)) + (define html (xexp->html-bytes body)) + (define headers + (build-headers + always-headers + ; redirect-query-parameter: only the string "no" is significant: + ; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367 + (when (and redirect-msg-a + (not (equal? redirect-query-parameter "no"))) + (let* ([dest (get-attribute 'href (bits->attributes redirect-msg-a))] + [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) + (header #"Refresh" value))))) + (response/full + 200 + #"OK" + (current-seconds) + #"text/html; charset=utf-8" + headers + (list html)))) diff --git a/static/captcha.js b/static/captcha.js new file mode 100644 index 0000000..87f6b88 --- /dev/null +++ b/static/captcha.js @@ -0,0 +1,24 @@ +const u = new URL(location) +const from = u.searchParams.get("from") || location.href +let answered = false + +const area = document.getElementById("captcha-area") +const areaBox = area.getBoundingClientRect() +const width = Math.floor(areaBox.width) +const height = Math.floor(window.innerHeight - areaBox.bottom - areaBox.left) +const img = document.createElement("img") +img.src = `/captcha/img/${width}/${height}` +img.addEventListener("click", event => { + if (answered) return + answered = true + location = `/captcha/verify/${width}/${height}/${event.offsetX}/${event.offsetY}?` + new URLSearchParams({from}) +}) +area.appendChild(img) + +document.addEventListener("keydown", event => { + if (event.repeat) { + if (answered) return + answered = true + location = `/captcha/verify/0/0/${event.key}/0?` + new URLSearchParams({from}) + } +}) diff --git a/static/jsonp.js b/static/jsonp.js new file mode 100644 index 0000000..36140d9 --- /dev/null +++ b/static/jsonp.js @@ -0,0 +1,57 @@ +const loading = document.getElementById("loading") +loading.textContent = "Loading, please wait..." +const progress = document.getElementById("progress") + +let wikiPage = null +function wikiPageCallback(data) { + wikiPage = data + cont() +} + +let siteinfo = null +function siteinfoCallback(data) { + siteinfo = data + cont() +} + +async function cont() { + if (!(wikiPage && siteinfo)) return + + const xhr = new XMLHttpRequest(); + + const uploadFraction = 0.7 + + // Upload progress + xhr.upload.addEventListener("progress", event => { + if (event.lengthComputable) { + progress.value = (event.loaded / event.total) * uploadFraction + console.log( + `Uploaded ${((event.loaded / event.total) * 100).toFixed(2)}%`, + ) + } + }) + + // Download progress + xhr.addEventListener("progress", event => { + if (event.lengthComputable) { + progress.value = (event.loaded / event.total) * (1 - uploadFraction) + uploadFraction + console.log( + `Downloaded ${((event.loaded / event.total) * 100).toFixed(2)}%`, + ) + } + }) + + xhr.addEventListener("load", () => { + console.log(xhr) + document.body = xhr.responseXML.body + }) + + xhr.open("POST", "/api/render/wiki") + xhr.responseType = "document" + xhr.send(JSON.stringify({ + data: wikiPage, + siteinfo, + wikiname, + path + })); +} diff --git a/static/robots.txt b/static/robots.txt index b0e8bf6..b62f622 100644 --- a/static/robots.txt +++ b/static/robots.txt @@ -2,3 +2,4 @@ User-Agent: * Disallow: /*/wiki/* Disallow: /proxy Disallow: /set-user-settings +Disallow: /captcha From 1c675d487361a3903eda15eac6cb3c77a8631cd1 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 4 Nov 2025 23:12:30 +1300 Subject: [PATCH 3/8] update dist --- dist.rkt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dist.rkt b/dist.rkt index 2e46f8c..491ca80 100644 --- a/dist.rkt +++ b/dist.rkt @@ -3,6 +3,7 @@ "src/config.rkt" "src/dispatcher-tree.rkt") +(require (only-in "src/page-captcha.rkt" page-captcha page-captcha-image page-captcha-verify)) (require (only-in "src/page-category.rkt" page-category)) (require (only-in "src/page-global-search.rkt" page-global-search)) (require (only-in "src/page-home.rkt" page-home)) @@ -15,8 +16,9 @@ (require (only-in "src/page-static.rkt" static-dispatcher)) (require (only-in "src/page-static-archive.rkt" page-static-archive)) (require (only-in "src/page-subdomain.rkt" subdomain-dispatcher)) -(require (only-in "src/page-wiki.rkt" page-wiki)) +(require (only-in "src/page-wiki.rkt" page-wiki page-wiki-with-data)) (require (only-in "src/page-wiki-offline.rkt" page-wiki-offline)) +(require (only-in "src/page-wiki-jsonp.rkt" page-wiki-jsonp)) (require (only-in "src/page-file.rkt" page-file)) (serve/launch/wait @@ -27,6 +29,9 @@ (λ (quit) (dispatcher-tree ; order of these does not matter + page-captcha + page-captcha-image + page-captcha-verify page-category page-global-search page-home @@ -38,6 +43,8 @@ page-static-archive page-wiki page-wiki-offline + page-wiki-with-data + page-wiki-jsonp page-file redirect-wiki-home static-dispatcher From 143fadcafba22424794cbb846380180a6046224e Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 4 Nov 2025 23:16:15 +1300 Subject: [PATCH 4/8] clear bogus log --- src/page-captcha.rkt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/page-captcha.rkt b/src/page-captcha.rkt index dcd72be..91889b6 100644 --- a/src/page-captcha.rkt +++ b/src/page-captcha.rkt @@ -152,9 +152,10 @@ (printf "captcha pass - key ~a [~a]~n" x (get-ip req))) (header #"Set-Cookie" (cookie->set-cookie-header (make-cookie "captcha" "key" #:path "/" #:max-age (* 60 60 24 365 10))))] - [else + [(= y 0) (when (config-true? 'captcha::log) - (printf "captcha fail - key ~a instead of ~a [~a]~n" x (get-key-solution req) (get-ip req)))]) + (printf "captcha fail - key ~a instead of ~a [~a]~n" x (get-key-solution req) (get-ip req)))] + [else (void)]) (when (> y 0) (let-values ([(solution-x solution-y) (get-coordinate-solution req w h)]) (let ([dist (+ (expt (- x solution-x) 2) (expt (- y solution-y) 2))]) From 4f4c939631fa17b1bf140a76db712ba61994bb11 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 4 Nov 2025 23:35:58 +1300 Subject: [PATCH 5/8] also update document head --- static/jsonp.js | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/static/jsonp.js b/static/jsonp.js index 36140d9..3dde75b 100644 --- a/static/jsonp.js +++ b/static/jsonp.js @@ -25,9 +25,6 @@ async function cont() { xhr.upload.addEventListener("progress", event => { if (event.lengthComputable) { progress.value = (event.loaded / event.total) * uploadFraction - console.log( - `Uploaded ${((event.loaded / event.total) * 100).toFixed(2)}%`, - ) } }) @@ -35,15 +32,19 @@ async function cont() { xhr.addEventListener("progress", event => { if (event.lengthComputable) { progress.value = (event.loaded / event.total) * (1 - uploadFraction) + uploadFraction - console.log( - `Downloaded ${((event.loaded / event.total) * 100).toFixed(2)}%`, - ) } }) xhr.addEventListener("load", () => { console.log(xhr) document.body = xhr.responseXML.body + document.title = xhr.responseXML.title + for (const e of xhr.responseXML.head.children) { + if (["LINK"].includes(e.tagName)) { + const imported = document.importNode(e, true) + document.head.appendChild(imported) + } + } }) xhr.open("POST", "/api/render/wiki") From 02848acfbb6c7da913dba6eb7cc2c87c36dca5c5 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Tue, 4 Nov 2025 23:56:32 +1300 Subject: [PATCH 6/8] make sure it's a post --- src/page-wiki.rkt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index 82956e4..707157a 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -96,17 +96,20 @@ (easy:response-body dest-res)))])) (define (page-wiki-with-data req) - (define post-data/bytes (request-post-data/raw req)) - (define post-data/string (bytes->string/utf-8 post-data/bytes)) - (define post-data (string->jsexpr post-data/string)) - (define wikiname (jp "/wikiname" post-data)) - (define path (jp "/path" post-data)) - (take-json-rewrite-and-return-page - #:req req - #:wikiname wikiname - #:source-url (format "https://~a.fandom.com/wiki/~a" wikiname path) - #:data (jp "/data" post-data) - #:siteinfo (data->siteinfo (jp "/siteinfo" post-data)))) + (response-handler + (define post-data/bytes (request-post-data/raw req)) + (when (not post-data/bytes) + (raise-user-error 'page-wiki-with-data "POST requests only, please.")) + (define post-data/string (bytes->string/utf-8 post-data/bytes)) + (define post-data (string->jsexpr post-data/string)) + (define wikiname (jp "/wikiname" post-data)) + (define path (jp "/path" post-data)) + (take-json-rewrite-and-return-page + #:req req + #:wikiname wikiname + #:source-url (format "https://~a.fandom.com/wiki/~a" wikiname path) + #:data (jp "/data" post-data) + #:siteinfo (data->siteinfo (jp "/siteinfo" post-data))))) (define (take-json-rewrite-and-return-page #:req req #:wikiname wikiname #:source-url source-url #:data data #:siteinfo siteinfo) (define title (jp "/parse/title" data "")) From c4e2fb00ef5fe7d01efe00e2a2875e696d00584d Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 5 Nov 2025 00:04:19 +1300 Subject: [PATCH 7/8] only replace content so suggestions work --- static/jsonp.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/static/jsonp.js b/static/jsonp.js index 3dde75b..4ed99de 100644 --- a/static/jsonp.js +++ b/static/jsonp.js @@ -37,7 +37,8 @@ async function cont() { xhr.addEventListener("load", () => { console.log(xhr) - document.body = xhr.responseXML.body + const imported = document.importNode(xhr.responseXML.getElementById("content"), true) + document.getElementById("content").replaceWith(imported) document.title = xhr.responseXML.title for (const e of xhr.responseXML.head.children) { if (["LINK"].includes(e.tagName)) { From 048709b2d1cbda604585f83bbc82d7cd5c510baf Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 5 Nov 2025 00:06:49 +1300 Subject: [PATCH 8/8] fix cache control for jsonp.js --- src/page-wiki-jsonp.rkt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/page-wiki-jsonp.rkt b/src/page-wiki-jsonp.rkt index 3e49abf..7ff425b 100644 --- a/src/page-wiki-jsonp.rkt +++ b/src/page-wiki-jsonp.rkt @@ -9,7 +9,8 @@ "config.rkt" "../lib/url-utils.rkt" "../lib/xexpr-utils.rkt" - "../lib/archive-file-mappings.rkt") + "../lib/archive-file-mappings.rkt" + "static-data.rkt") (provide page-wiki-jsonp) @@ -51,7 +52,7 @@ var wikiname = ~v; var path = ~v; END wikiname path)) - (script (@ (src "/static/jsonp.js"))) + (script (@ (src ,(get-static-url "jsonp.js")))) (script (@ (async) (src ,wiki-page-script-url))) (script (@ (async) (src ,siteinfo-script-url)))) #:req req