diff --git a/info.rkt b/info.rkt index 74152ef..46512df 100644 --- a/info.rkt +++ b/info.rkt @@ -1,3 +1,3 @@ #lang info -(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib")) +(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo")) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index c214924..7f16bee 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,6 +9,8 @@ "url-utils.rkt") (provide + ; headers to always send on all http responses + always-headers ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -22,6 +24,8 @@ (require rackunit html-writing)) +(define always-headers + (list (header #"Referrer-Policy" #"same-origin"))) ; header to not send referers to fandom (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) @@ -48,8 +52,8 @@ ,(if source-url `(div (p "This page displays proxied content from " (a (@ (href ,source-url) (rel "noreferrer")) ,source-url) - ,(format ". Text content is available under the ~a license, " (license-text license)) - (a (@ (href ,(license-url license))) "see license info.") + ,(format ". Text content is available under the ~a license, " (license^-text license)) + (a (@ (href ,(license^-url license))) "see license info.") " Media files may have different copying restrictions.") (p ,(format "Fandom is a trademark of Fandom, Inc. ~a is not affiliated with Fandom." (config-get 'application_name)))) `(div (p "Text content on wikis run by Fandom is available under the Creative Commons Attribution-Share Alike License 3.0 (Unported), " @@ -63,7 +67,8 @@ #:wikiname wikiname #:title title #:body-class [body-class-in #f] - #:license [license #f]) + #:siteinfo [siteinfo-in #f]) + (define siteinfo (or siteinfo-in siteinfo-default)) (define body-class (if (not body-class-in) "skin-fandomdesktop" body-class-in)) @@ -82,7 +87,10 @@ `(html (head (meta (@ (name "viewport") (content "width=device-width, initial-scale=1"))) - (title ,(format "~a | ~a" title (config-get 'application_name))) + (title ,(format "~a | ~a+~a" + title + (regexp-replace #rx" ?Wiki$" (siteinfo^-sitename siteinfo) "") + (config-get 'application_name))) ,@(map (λ (url) `(link (@ (rel "stylesheet") (type "text/css") (href ,url)))) (required-styles (format "https://~a.fandom.com" wikiname))) @@ -101,7 +109,7 @@ (div (@ (id "content") #;(class "page-content")) (div (@ (id "mw-content-text")) ,content)) - ,(application-footer source-url #:license license))))))) + ,(application-footer source-url #:license (siteinfo^-license siteinfo)))))))) (module+ test (define page (parameterize ([(config-parameter 'strict_proxy) "true"]) diff --git a/src/data.rkt b/src/data.rkt index 8eb0cd6..6673e4c 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -1,33 +1,36 @@ #lang racket/base -(require (prefix-in easy: net/http-easy) +(require racket/list + (prefix-in easy: net/http-easy) + memo "url-utils.rkt" "xexpr-utils.rkt") (provide - (struct-out license) - license-default - license-auto) + (struct-out siteinfo^) + (struct-out license^) + siteinfo-fetch + siteinfo-default + license-default) -(struct license (text url) #:transparent) -(define license-default (license "CC-BY-SA" "https://www.fandom.com/licensing")) -(define license-hash (make-hash)) -(define (license-fetch wikiname) +(struct siteinfo^ (sitename basepage license) #:transparent) +(struct license^ (text url) #:transparent) + +(define license-default (license^ "CC-BY-SA" "https://www.fandom.com/licensing")) +(define siteinfo-default (siteinfo^ "Test Wiki" "Main_Page" license-default)) + +(define/memoize (siteinfo-fetch wikiname) #:hash hash (define dest-url (format "https://~a.fandom.com/api.php?~a" wikiname (params->query '(("action" . "query") ("meta" . "siteinfo") - ("siprop" . "rightsinfo") + ("siprop" . "general|rightsinfo") ("format" . "json") ("formatversion" . "2"))))) (log-outgoing dest-url) (define res (easy:get dest-url)) (define data (easy:response-json res)) - (license (jp "/query/rightsinfo/text" data) - (jp "/query/rightsinfo/url" data))) -(define (license-auto wikiname) - (if (hash-has-key? license-hash wikiname) - (hash-ref license-hash wikiname) - (let ([result (license-fetch wikiname)]) - (hash-set! license-hash wikiname result) - result))) + (siteinfo^ (jp "/query/general/sitename" data) + (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) + (license^ (jp "/query/rightsinfo/text" data) + (jp "/query/rightsinfo/url" data)))) diff --git a/src/page-category.rkt b/src/page-category.rkt index f7c43b2..6c0a733 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -35,14 +35,14 @@ #:members-data members-data #:page page #:body-class [body-class #f] - #:license [license #f]) + #:siteinfo [siteinfo #f]) (define members (jp "/query/categorymembers" members-data)) (generate-wiki-page #:source-url source-url #:wikiname wikiname #:title title #:body-class body-class - #:license license + #:siteinfo siteinfo `(div ,(update-tree-wiki page wikiname) (hr) @@ -89,7 +89,7 @@ (log-outgoing dest-url) (define dest-res (easy:get dest-url #:timeouts timeouts)) (easy:response-json dest-res)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) @@ -105,7 +105,7 @@ #:members-data members-data #:page page #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects @@ -113,6 +113,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-home.rkt b/src/page-home.rkt index 7c7aaa1..b16f66a 100644 --- a/src/page-home.rkt +++ b/src/page-home.rkt @@ -4,6 +4,7 @@ html-writing web-server/http "application-globals.rkt" + "url-utils.rkt" "xexpr-utils.rkt" "config.rkt") @@ -81,6 +82,7 @@ (define (page-home req) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))) diff --git a/src/page-proxy.rkt b/src/page-proxy.rkt index 8dbf0f9..3c22e1e 100644 --- a/src/page-proxy.rkt +++ b/src/page-proxy.rkt @@ -8,6 +8,7 @@ net/url web-server/http (only-in web-server/dispatchers/dispatch next-dispatcher) + "application-globals.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -26,6 +27,7 @@ (response/output #:code (easy:response-status-code dest-r) #:mime-type (easy:response-headers-ref dest-r 'content-type) + #:headers (build-headers always-headers) (λ (out) (copy-port (easy:response-output dest-r) out) (easy:response-close! dest-r)))))) diff --git a/src/page-redirect-wiki-home.rkt b/src/page-redirect-wiki-home.rkt index b0c3df3..c8e6dde 100644 --- a/src/page-redirect-wiki-home.rkt +++ b/src/page-redirect-wiki-home.rkt @@ -2,6 +2,7 @@ (require net/url web-server/http "application-globals.rkt" + "data.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -11,5 +12,6 @@ (define (redirect-wiki-home req) (response-handler (define wikiname (path/param-path (car (url-path (request-uri req))))) - (define dest (format "/~a/wiki/Main_Page" wikiname)) + (define siteinfo (siteinfo-fetch wikiname)) + (define dest (format "/~a/wiki/~a" wikiname (or (siteinfo^-basepage siteinfo) "Main_Page"))) (generate-redirect dest))) diff --git a/src/page-search.rkt b/src/page-search.rkt index 387deab..81a88b2 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -25,13 +25,13 @@ (define search-json-data '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) -(define (generate-results-page dest-url wikiname query data #:license [license #f]) +(define (generate-results-page dest-url wikiname query data #:siteinfo [siteinfo #f]) (define search-results (jp "/query/search" data)) (generate-wiki-page #:source-url dest-url #:wikiname wikiname #:title "Search Results" - #:license license + #:siteinfo siteinfo `(div (@ (class "mw-parser-output")) (p ,(format "~a results found for " (length search-results)) (strong ,query)) @@ -70,17 +70,18 @@ (thread-let ([dest-res (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (define data (easy:response-json dest-res)) - (define body (generate-results-page dest-url wikiname query data #:license license)) + (define body (generate-results-page dest-url wikiname query data #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid (xexp->html body)) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index a218dfe..effa40a 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -152,15 +152,17 @@ (λ (v) (dict-update v 'rel (λ (s) (list (string-append (car s) " noreferrer"))) '("")))) - ; proxy images from inline styles - (curry attribute-maybe-update 'style - (λ (style) - (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style - (λ (whole url) - (string-append - "url(" - (u-proxy-url url) - ")"))))) + ; proxy images from inline styles, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'style + (λ (style) + (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style + (λ (whole url) + (string-append + "url(" + (u-proxy-url url) + ")")))) v))) ; and also their links, if strict_proxy is set (curry u (λ (v) @@ -168,8 +170,10 @@ (eq? element-type 'a) (has-class? "image-thumbnail" v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v))) - ; proxy images from src attributes - (curry attribute-maybe-update 'src u-proxy-url) + ; proxy images from src attributes, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'src u-proxy-url v))) ; don't lazyload images (curry u (λ (v) (dict-has-key? v 'data-src)) @@ -248,7 +252,7 @@ ("format" . "json"))))) (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (cond [(eq? 200 (easy:response-status-code dest-res)) @@ -271,13 +275,15 @@ #:wikiname wikiname #:title title #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) - (define headers (if redirect-msg - (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] - [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value))) - (list))) + (define headers + (build-headers + always-headers + (when redirect-msg + (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] + [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) + (header #"Refresh" value))))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid diff --git a/src/url-utils.rkt b/src/url-utils.rkt index 62f7cc2..10df089 100644 --- a/src/url-utils.rkt +++ b/src/url-utils.rkt @@ -2,6 +2,8 @@ (require racket/string "config.rkt" "pure-utils.rkt") +(require/typed web-server/http/request-structs + [#:opaque Header header?]) (provide ; regex to match wiki names @@ -13,7 +15,9 @@ ; check whether a url is on a domain controlled by fandom is-fandom-url? ; prints "out: " - log-outgoing) + log-outgoing + ; pass in a header, headers, or something useless. they'll all combine into a list + build-headers) (module+ test (require "typed-rackunit.rkt")) @@ -81,3 +85,16 @@ (define (log-outgoing url-string) (when (config-true? 'log_outgoing) (printf "out: ~a~n" url-string))) + +(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header))) +(define (build-headers . fs) + (apply + append + (map (λ ([f : (U Header (Listof Header) False Void)]) + (cond + [(not f) null] + [(void? f) null] + [(null? f) null] + [(header? f) (list f)] + [(pair? f) f])) + fs)))