diff --git a/lib/syntax.rkt b/lib/syntax.rkt index 10267dd..1c3fae2 100644 --- a/lib/syntax.rkt +++ b/lib/syntax.rkt @@ -4,6 +4,8 @@ (provide ; help make a nested if. if/in will gain the same false form of its containing if/out. if/out + ; let, but the value for each variable is evaluated within a thread + thread-let ; cond, but values can be defined between conditions cond/var ; wrap sql statements into lambdas so they can be executed during migration @@ -23,6 +25,7 @@ (provide transform-if/out + transform-thread-let transform/out-cond/var) (define (transform-if/out stx) @@ -48,6 +51,26 @@ [#t node]))) (datum->syntax stx (cons 'if result))) + (define (transform-thread-let stx) + (define tree (cdr (syntax->datum stx))) + (define defs (car tree)) + (define forms (cdr tree)) + (when (eq? (length forms) 0) + (error (format "thread-let: bad syntax (need some forms to execute after the threads)~n forms: ~a" forms))) + (define counter (build-list (length defs) values)) + (datum->syntax + stx + `(let ([chv (build-vector ,(length defs) (λ (_) (make-channel)))]) + ,@(map (λ (n) + (define def (list-ref defs n)) + `(thread (λ () (channel-put (vector-ref chv ,n) (let _ () ,@(cdr def)))))) + counter) + (let ,(map (λ (n) + (define def (list-ref defs n)) + `(,(car def) (channel-get (vector-ref chv ,n)))) + counter) + ,@forms)))) + (define (transform/out-cond/var stx) (define tree (transform-cond/var (cdr (syntax->datum stx)))) (datum->syntax @@ -60,11 +83,11 @@ (if (null? rest) `(cond ,@els) `(cond - ,@els - [#t - (let* ,(for/list ([var vars]) - (cdr var)) - ,(transform-cond/var rest))])))) + ,@els + [#t + (let ,(for/list ([var vars]) + (cdr var)) + ,(transform-cond/var rest))])))) ;; the syntax definitions and their tests go below here @@ -96,6 +119,35 @@ (check-equal? (if/out #t (if/in #f 'yes) 'no) 'no) (check-equal? (if/out #f (if/in #f 'yes) 'no) 'no)) +(define-syntax (thread-let stx) + (transform-thread-let stx)) +(module+ test + ; check that it is transformed as expected + (check-syntax-equal? + (transform-thread-let + #'(thread-let ([a (hey "this is a")] + [b (hey "this is b")]) + (list a b))) + #'(let ([chv (build-vector 2 (λ (_) (make-channel)))]) + (thread (λ () (channel-put (vector-ref chv 0) (let _ () (hey "this is a"))))) + (thread (λ () (channel-put (vector-ref chv 1) (let _ () (hey "this is b"))))) + (let ([a (channel-get (vector-ref chv 0))] + [b (channel-get (vector-ref chv 1))]) + (list a b)))) + ; check that they actually execute concurrently + (define ch (make-channel)) + (check-equal? (thread-let ([a (begin + (channel-put ch 'a) + (channel-get ch))] + [b (begin0 + (channel-get ch) + (channel-put ch 'b))]) + (list a b)) + '(b a)) + ; check that it assigns the correct value to the correct variable + (check-equal? (thread-let ([a (sleep 0) 'a] [b 'b]) (list a b)) + '(a b))) + (define-syntax (cond/var stx) (transform/out-cond/var stx)) (module+ test diff --git a/lib/thread-utils.rkt b/lib/thread-utils.rkt deleted file mode 100644 index f907dac..0000000 --- a/lib/thread-utils.rkt +++ /dev/null @@ -1,72 +0,0 @@ -#lang racket/base -(require (prefix-in easy: net/http-easy) - "../src/data.rkt" - "xexpr-utils.rkt") - -(provide - thread-values) - -(module+ test - (require rackunit)) - -(define (thread-values . thunks) - (parameterize-break #t - (define the-exn (box #f)) - (define original-thread (current-thread)) - (define (break e) - (when (box-cas! the-exn #f e) - (break-thread original-thread)) - (sleep 0)) - (define-values (threads channels) - (for/fold ([ts null] - [chs null] - #:result (values (reverse ts) (reverse chs))) - ([th thunks]) - (define ch (make-channel)) - (define t - (thread (λ () - (with-handlers ([exn? break]) - (channel-put ch (th)))))) - (values (cons t ts) (cons ch chs)))) - (apply - values - (with-handlers ([exn:break? (λ (_) - (for ([t threads]) (kill-thread t)) - (if (unbox the-exn) - (raise (unbox the-exn)) - (error 'thread-values "a thread broke, but without reporting its exception")))]) - (for/list ([ch channels]) - (channel-get ch)))))) - -(module+ test - ; check that they actually execute concurrently - (define ch (make-channel)) - (check-equal? (let-values ([(a b) - (thread-values - (λ () - (begin - (channel-put ch 'a) - (channel-get ch))) - (λ () - (begin0 - (channel-get ch) - (channel-put ch 'b))))]) - (list a b)) - '(b a)) - ; check that it assigns the correct value to the correct variable - (check-equal? (let-values ([(a b) - (thread-values - (λ () (sleep 0) 'a) - (λ () 'b))]) - (list a b)) - '(a b)) - ; check that exceptions are passed to the original thread, and other threads are killed - ;; TODO: if the other thread was making an HTTP request, could it be left stuck open by the kill? - (check-equal? (let* ([x "!"] - [res - (with-handlers ([exn:fail:user? (λ (e) (exn-message e))]) - (thread-values - (λ () (sleep 0) (set! x "?") (println "this side effect should not happen")) - (λ () (raise-user-error "catch me"))))]) - (string-append res x)) - "catch me!")) diff --git a/lib/tree-updater.rkt b/lib/tree-updater.rkt index 98a989b..a6edb6b 100644 --- a/lib/tree-updater.rkt +++ b/lib/tree-updater.rkt @@ -277,8 +277,8 @@ ; check that noscript images are removed (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) ; benchmark - (when (file-exists? "../storage/Frog.html") - (with-input-from-file "../storage/Frog.html" + (when (file-exists? "../misc/Frog.html") + (with-input-from-file "../misc/Frog.html" (λ () (define tree (html->xexp (current-input-port))) (time (length (update-tree-wiki tree "minecraft"))))))) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index 4a797ad..241b9ad 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -201,16 +201,13 @@ ,(if (config-true? 'instance_is_official) (let ([balloon '(img (@ (src "/static/three-balloons.png") (class "bw-balloon") (title "Image Source: pngimg.com/image/4955 | License: CC BY-NC 4.0 | Modifications: Resized") (width "52") (height "56")))] [extension-eligible? - (cond/var - [(not req) #f] - (var ua-pair (assq 'user-agent (request-headers req))) - [(not ua-pair) #f] - (var ua (string-downcase (cdr ua-pair))) - ;; everyone pretends to be chrome, so we do it in reverse - ;; this excludes common browsers that don't support the extension - [#t (and (not (string-contains? ua "edge/")) - (not (string-contains? ua "edg/")) - (not (string-contains? ua "mobile")))])]) + (and req (let* ([ua-pair (assq 'user-agent (request-headers req))] + [ua (string-downcase (cdr ua-pair))]) + ;; everyone pretends to be chrome, so we do it in reverse + ;; this excludes common browsers that don't support the extension + (and (not (string-contains? ua "edge/")) + (not (string-contains? ua "edg/")) + (not (string-contains? ua "mobile")))))]) `(div (@ (class "bw-top-banner")) ,balloon (div diff --git a/src/extwiki-generic.rkt b/src/extwiki-generic.rkt index fe30b14..6180676 100644 --- a/src/extwiki-generic.rkt +++ b/src/extwiki-generic.rkt @@ -20,7 +20,6 @@ (define wikis '(((gallowmere) "MediEvil Wiki" "https://medievil.wiki/w/Main_Page" #f #f) ((fallout) "Fallout Wiki" "https://fallout.wiki/wiki/Fallout_Wiki" #f "https://fallout.wiki/api.php") - ((drawntolife) "Wapopedia" "https://drawntolife.wiki/en/Main_Page" #f "https://drawntolife.wiki/w/api.php") )) (define wikis-hash (make-hash)) @@ -29,7 +28,7 @@ (hash-set! wikis-hash (symbol->string wikiname) w))) (module+ test (check-equal? (cadr (hash-ref wikis-hash "gallowmere")) - "MediEvil Wiki")) + "Gallowmere Historia")) (define (parse-table table) (define rows (query-selector (λ (t a c) (eq? t 'tr)) table)) @@ -59,10 +58,8 @@ [(not logo) (values #f '("Data table must have a \"Logo\" column"))] [(null? logo) (values #f '("Logo table column must have a link"))] (var href (get-attribute 'href (bits->attributes (car (hash-ref table 'logo))))) - (var src (get-attribute 'src (bits->attributes (car (hash-ref table 'logo))))) - (var true-src (or href src)) - [(not true-src) (values #f '("Logo table column must have a link"))] - [#t (values true-src null)])) + [(not href) (values #f '("Logo table column must have a link"))] + [#t (values href null)])) (define (get-api-endpoint wiki) (define main-page (third wiki)) @@ -80,7 +77,7 @@ (or override (match main-page [(regexp #rx"/$") (string-append main-page "Special:Search")] - [(regexp #rx"^(.*/(?:en|w[^./]*)/)" (list _ wiki-prefix)) (string-append wiki-prefix "Special:Search")] + [(regexp #rx"^(.*/w[^./]*/)" (list _ wiki-prefix)) (string-append wiki-prefix "Special:Search")] [_ (error 'get-search-page "unknown url format: ~a" main-page)]))) (define/memoize (get-redirect-content wikiname) #:hash hash @@ -112,10 +109,8 @@ ,@body (p "This wiki's core community has wholly migrated away from Fandom. You should " (a (@ (href ,go)) "go to " ,display-name " now!"))) - ,(if logo - `(div (@ (class "niwa__right")) - (img (@ (class "niwa__logo") (src ,logo)))) - "")) + (div (@ (class "niwa__right")) + (img (@ (class "niwa__logo") (src ,logo))))) ,(if (pair? links) `(p (@ (class "niwa__feedback")) ,@(add-between links " / ")) @@ -127,4 +122,4 @@ "")))] [#t #f])) (module+ test - (check-not-false ((get-redirect-content "gallowmere") "MediEvil Wiki"))) + ((get-redirect-content "gallowmere") "Gallowmere Historia")) diff --git a/src/page-category.rkt b/src/page-category.rkt index 29b541c..aa48ffd 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -17,7 +17,6 @@ "data.rkt" "page-wiki.rkt" "../lib/syntax.rkt" - "../lib/thread-utils.rkt" "../lib/url-utils.rkt" "whole-utils.rkt" "../lib/xexpr-utils.rkt") @@ -70,59 +69,55 @@ (define origin (format "https://~a.fandom.com" wikiname)) (define source-url (format "~a/wiki/~a" origin prefixed-category)) - (define-values (members-data page-data siteinfo) - (thread-values - (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "query") - ("list" . "categorymembers") - ("cmtitle" . ,prefixed-category) - ("cmlimit" . "max") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) - (easy:response-json dest-res)) - (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "parse") - ("page" . ,prefixed-category) - ("prop" . "text|headhtml|langlinks") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) - (easy:response-json dest-res)) - (λ () - (siteinfo-fetch wikiname)))) + (thread-let + ([members-data (define dest-url + (format "~a/api.php?~a" + origin + (params->query `(("action" . "query") + ("list" . "categorymembers") + ("cmtitle" . ,prefixed-category) + ("cmlimit" . "max") + ("formatversion" . "2") + ("format" . "json"))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [page-data (define dest-url + (format "~a/api.php?~a" + origin + (params->query `(("action" . "parse") + ("page" . ,prefixed-category) + ("prop" . "text|headhtml|langlinks") + ("formatversion" . "2") + ("format" . "json"))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [siteinfo (siteinfo-fetch wikiname)]) - (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) - (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) - (define page (html->xexp page-html)) - (define head-data ((head-data-getter wikiname) page-data)) - (define body (generate-results-page - #:req req - #:source-url source-url - #:wikiname wikiname - #:title title - #:members-data members-data - #:page page - #:head-data head-data - #:siteinfo siteinfo)) + (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) + (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) + (define page (html->xexp page-html)) + (define head-data ((head-data-getter wikiname) page-data)) + (define body (generate-results-page + #:req req + #:source-url source-url + #:wikiname wikiname + #:title title + #:members-data members-data + #:page page + #:head-data head-data + #:siteinfo siteinfo)) - (when (config-true? 'debug) - ; used for its side effects - ; convert to string with error checking, error will be raised if xexp is invalid - (xexp->html body)) - (response/output - #:code 200 - #:headers (build-headers always-headers) - (λ (out) - (write-html body out))))) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output + #:code 200 + #:headers (build-headers always-headers) + (λ (out) + (write-html body out)))))) (module+ test (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Ankle_Monitor") (generate-results-page diff --git a/src/page-file.rkt b/src/page-file.rkt index 2a7332c..2062892 100644 --- a/src/page-file.rkt +++ b/src/page-file.rkt @@ -17,7 +17,6 @@ "data.rkt" "page-wiki.rkt" "../lib/syntax.rkt" - "../lib/thread-utils.rkt" "../lib/url-utils.rkt" "whole-utils.rkt" "../lib/xexpr-utils.rkt") @@ -109,45 +108,42 @@ (define origin (format "https://~a.fandom.com" wikiname)) (define source-url (format "~a/wiki/~a" origin prefixed-title)) - (define-values (media-detail siteinfo) - (thread-values - (λ () - (define dest-url - (format "~a/wikia.php?~a" - origin - (params->query `(("format" . "json") ("controller" . "Lightbox") - ("method" . "getMediaDetail") - ("fileTitle" . ,prefixed-title))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) - (easy:response-json dest-res)) - (λ () - (siteinfo-fetch wikiname)))) - (if (not (jp "/exists" media-detail #f)) - (next-dispatcher) - (response-handler - (define file-title (jp "/fileTitle" media-detail "")) - (define title - (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) - (define image-content-type - (if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) - #f - (url-content-type (jp "/imageUrl" media-detail)))) - (define body - (generate-results-page #:req req - #:source-url source-url - #:wikiname wikiname - #:title title - #:media-detail media-detail - #:image-content-type image-content-type - #:siteinfo siteinfo)) - (when (config-true? 'debug) - ; used for its side effects - ; convert to string with error checking, error will be raised if xexp is invalid - (xexp->html body)) - (response/output #:code 200 - #:headers (build-headers always-headers) - (λ (out) (write-html body out))))))) + (thread-let + ([media-detail (define dest-url + (format "~a/wikia.php?~a" + origin + (params->query `(("format" . "json") ("controller" . "Lightbox") + ("method" . "getMediaDetail") + ("fileTitle" . ,prefixed-title))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [siteinfo (siteinfo-fetch wikiname)]) + (if (not (jp "/exists" media-detail #f)) + (next-dispatcher) + (response-handler + (define file-title (jp "/fileTitle" media-detail "")) + (define title + (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) + (define image-content-type + (if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) + #f + (url-content-type (jp "/imageUrl" media-detail)))) + (define body + (generate-results-page #:req req + #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:siteinfo siteinfo)) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output #:code 200 + #:headers (build-headers always-headers) + (λ (out) (write-html body out)))))))) (module+ test (parameterize ([(config-parameter 'strict_proxy) "true"]) (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") diff --git a/src/page-search.rkt b/src/page-search.rkt index ce527c0..2066af3 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -14,7 +14,6 @@ "config.rkt" "data.rkt" "../lib/syntax.rkt" - "../lib/thread-utils.rkt" "../lib/url-utils.rkt" "whole-utils.rkt" "../lib/xexpr-utils.rkt") @@ -73,26 +72,23 @@ ("formatversion" . "2") ("format" . "json"))))) - (define-values (dest-res siteinfo) - (thread-values - (λ () - (log-outgoing dest-url) - (easy:get dest-url #:timeouts timeouts)) - (λ () - (siteinfo-fetch wikiname)))) + (thread-let + ([dest-res (log-outgoing dest-url) + (easy:get dest-url #:timeouts timeouts)] + [siteinfo (siteinfo-fetch wikiname)]) - (define data (easy:response-json dest-res)) + (define data (easy:response-json dest-res)) - (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) - (when (config-true? 'debug) - ; used for its side effects - ; convert to string with error checking, error will be raised if xexp is invalid - (xexp->html body)) - (response/output - #:code 200 - #:headers (build-headers always-headers) - (λ (out) - (write-html body out))))) + (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output + #:code 200 + #:headers (build-headers always-headers) + (λ (out) + (write-html body out)))))) (module+ test (parameterize ([(config-parameter 'feature_offline::only) "false"]) (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index 8df701c..3fa905b 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -19,7 +19,6 @@ "data.rkt" "../lib/pure-utils.rkt" "../lib/syntax.rkt" - "../lib/thread-utils.rkt" "../lib/tree-updater.rkt" "../lib/url-utils.rkt" "whole-utils.rkt" @@ -42,62 +41,59 @@ (define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/")) (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) - (define-values (dest-res siteinfo) - (thread-values - (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "parse") - ("page" . ,path) - ("prop" . "text|headhtml|langlinks") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (easy:get dest-url - #:timeouts timeouts - #:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies)))))) - (λ () - (siteinfo-fetch wikiname)))) + (thread-let + ([dest-res (define dest-url + (format "~a/api.php?~a" + origin + (params->query `(("action" . "parse") + ("page" . ,path) + ("prop" . "text|headhtml|langlinks") + ("formatversion" . "2") + ("format" . "json"))))) + (log-outgoing dest-url) + (easy:get dest-url + #:timeouts timeouts + #:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies)))))] + [siteinfo (siteinfo-fetch wikiname)]) - (cond - [(eq? 200 (easy:response-status-code dest-res)) - (let* ([data (easy:response-json dest-res)] - [title (jp "/parse/title" data "")] - [page-html (jp "/parse/text" data "")] - [page-html (preprocess-html-wiki page-html)] - [page (html->xexp page-html)] - [head-data ((head-data-getter wikiname) data)]) - (if (equal? "missingtitle" (jp "/error/code" data #f)) - (next-dispatcher) - (response-handler - (define body - (generate-wiki-page - (update-tree-wiki page wikiname) - #:req req - #:source-url source-url - #:wikiname wikiname - #:title title - #:head-data head-data - #:siteinfo siteinfo)) - (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) - (define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes")) - (define headers - (build-headers - always-headers - ; redirect-query-parameter: only the string "no" is significant: - ; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367 - (when (and redirect-msg - (not (equal? redirect-query-parameter "no"))) - (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] - [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (header #"Refresh" value))))) - (when (config-true? 'debug) - ; used for its side effects - ; convert to string with error checking, error will be raised if xexp is invalid - (xexp->html body)) - (response/output - #:code 200 - #:headers headers - (λ (out) - (write-html body out))))))])) + (cond + [(eq? 200 (easy:response-status-code dest-res)) + (let* ([data (easy:response-json dest-res)] + [title (jp "/parse/title" data "")] + [page-html (jp "/parse/text" data "")] + [page-html (preprocess-html-wiki page-html)] + [page (html->xexp page-html)] + [head-data ((head-data-getter wikiname) data)]) + (if (equal? "missingtitle" (jp "/error/code" data #f)) + (next-dispatcher) + (response-handler + (define body + (generate-wiki-page + (update-tree-wiki page wikiname) + #:req req + #:source-url source-url + #:wikiname wikiname + #:title title + #:head-data head-data + #:siteinfo siteinfo)) + (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) + (define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes")) + (define headers + (build-headers + always-headers + ; redirect-query-parameter: only the string "no" is significant: + ; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367 + (when (and redirect-msg + (not (equal? redirect-query-parameter "no"))) + (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] + [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) + (header #"Refresh" value))))) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output + #:code 200 + #:headers headers + (λ (out) + (write-html body out))))))])))