From 5a59545963d90eecf0715f4b0788d6a189b8971a Mon Sep 17 00:00:00 2001 From: blankie Date: Sun, 9 Oct 2022 10:53:02 +0700 Subject: [PATCH 1/8] Set Referrer-Policy to no-referrer Fandom sends a fake 404 to media if there's a Referer header that has an origin that's not Fandom. However, we can choose not to send the header by setting Referrer-Policy. See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy --- src/application-globals.rkt | 3 +++ src/page-category.rkt | 1 + src/page-search.rkt | 1 + src/page-wiki.rkt | 30 +++++++++++++++++------------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index c214924..9a21592 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,6 +9,8 @@ "url-utils.rkt") (provide + ; header to not send referers to fandom + referrer-policy ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -22,6 +24,7 @@ (require rackunit html-writing)) +(define referrer-policy (header #"Referrer-Policy" #"no-referrer")) (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) diff --git a/src/page-category.rkt b/src/page-category.rkt index f7c43b2..c9c3ec2 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -113,6 +113,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-search.rkt b/src/page-search.rkt index 387deab..d42fce9 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -81,6 +81,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index a218dfe..be24af8 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -152,15 +152,17 @@ (λ (v) (dict-update v 'rel (λ (s) (list (string-append (car s) " noreferrer"))) '("")))) - ; proxy images from inline styles - (curry attribute-maybe-update 'style - (λ (style) - (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style - (λ (whole url) - (string-append - "url(" - (u-proxy-url url) - ")"))))) + ; proxy images from inline styles, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'style + (λ (style) + (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style + (λ (whole url) + (string-append + "url(" + (u-proxy-url url) + ")")))) v))) ; and also their links, if strict_proxy is set (curry u (λ (v) @@ -168,8 +170,10 @@ (eq? element-type 'a) (has-class? "image-thumbnail" v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v))) - ; proxy images from src attributes - (curry attribute-maybe-update 'src u-proxy-url) + ; proxy images from src attributes, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'src u-proxy-url v))) ; don't lazyload images (curry u (λ (v) (dict-has-key? v 'data-src)) @@ -276,8 +280,8 @@ (define headers (if redirect-msg (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value))) - (list))) + (list (header #"Refresh" value) referrer-policy)) + (list referrer-policy))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid From bf80692c4e6338dad466eb5727eb00f65ca94a52 Mon Sep 17 00:00:00 2001 From: blankie Date: Sat, 8 Oct 2022 15:35:35 +0700 Subject: [PATCH 2/8] Add support for File: pages Fixes https://lists.sr.ht/~cadence/breezewiki-discuss/%3Cb2835a70-5118-4df0-90c9-4333486a4b69%40nixnetmail.com%3E --- breezewiki.rkt | 2 + dist.rkt | 2 + src/dispatcher-tree.rkt | 1 + src/page-file.rkt | 166 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/page-file.rkt diff --git a/breezewiki.rkt b/breezewiki.rkt index dfb405e..3fc9b8f 100644 --- a/breezewiki.rkt +++ b/breezewiki.rkt @@ -19,6 +19,7 @@ (require-reloadable "src/page-static.rkt" static-dispatcher) (require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher) (require-reloadable "src/page-wiki.rkt" page-wiki) +(require-reloadable "src/page-file.rkt" page-file) (reload!) @@ -38,6 +39,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher)))) diff --git a/dist.rkt b/dist.rkt index a626695..805df48 100644 --- a/dist.rkt +++ b/dist.rkt @@ -13,6 +13,7 @@ (require (only-in "src/page-static.rkt" static-dispatcher)) (require (only-in "src/page-subdomain.rkt" subdomain-dispatcher)) (require (only-in "src/page-wiki.rkt" page-wiki)) +(require (only-in "src/page-file.rkt" page-file)) (serve/launch/wait #:listen-ip (if (config-true? 'debug) "127.0.0.1" #f) @@ -27,6 +28,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher))) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index f2c1412..b68cf9c 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -45,6 +45,7 @@ (pathprocedure:make "/proxy" (hash-ref ds 'page-proxy)) (pathprocedure:make "/search" (hash-ref ds 'page-global-search)) (filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-category))) + (filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-file))) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki))) (filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search))) (filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home))) diff --git a/src/page-file.rkt b/src/page-file.rkt new file mode 100644 index 0000000..7635eb4 --- /dev/null +++ b/src/page-file.rkt @@ -0,0 +1,166 @@ +#lang racket/base +(require racket/dict + racket/list + racket/match + racket/string + (prefix-in easy: net/http-easy) + ; html libs + html-parsing + html-writing + ; web server libs + net/url + web-server/http + (only-in web-server/dispatchers/dispatch next-dispatcher) + #;(only-in web-server/http/redirect redirect-to) + "application-globals.rkt" + "config.rkt" + "data.rkt" + "page-wiki.rkt" + "syntax.rkt" + "url-utils.rkt" + "xexpr-utils.rkt") + +(provide page-file) + +(module+ test + (require rackunit) + (define test-media-detail + '#hasheq((fileTitle . "Example file") + (videoEmbedCode . "") + (imageUrl . "https://static.wikia.nocookie.net/examplefile") + (rawImageUrl . "https://static.wikia.nocookie.net/examplefile") + (userName . "blankie") + (isPostedIn . #t) + (smallerArticleList . (#hasheq((title . "Example_article") + (titleText . "Example article")))) + (articleListIsSmaller . 0) + (exists . #t) + (imageDescription . #f)))) + +(define (url-content-type url) + (log-outgoing url) + (define dest-res (easy:head url #:timeouts timeouts)) + (easy:response-headers-ref dest-res 'content-type)) + +(define (get-media-html url content-type) + (define maybe-proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) + (cond + [(eq? content-type #f) `""] + [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,maybe-proxied-url)))] + [(regexp-match? #rx"(?i:^audio/|^application/ogg(;|$))" content-type) + `(audio (@ (src ,maybe-proxied-url) (controls)))] + [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,maybe-proxied-url) (controls)))] + [else `""])) + +(define (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:license [license #f]) + (define video-embed-code (jp "/videoEmbedCode" media-detail "")) + (define raw-image-url (jp "/rawImageUrl" media-detail)) + (define image-url (jp "/imageUrl" media-detail raw-image-url)) + (define username (jp "/userName" media-detail)) + (define is-posted-in (jp "/isPostedIn" media-detail #f)) + (define smaller-article-list (jp "/smallerArticleList" media-detail)) + (define article-list-is-smaller (jp "/articleListIsSmaller" media-detail)) + (define image-description (jp "/imageDescription" media-detail #f)) + (define maybe-proxied-raw-image-url + (if (config-true? 'strict_proxy) (u-proxy-url raw-image-url) raw-image-url)) + (generate-wiki-page + #:source-url source-url + #:wikiname wikiname + #:title title + #:license license + `(div ,(if (non-empty-string? video-embed-code) + (update-tree-wiki (html->xexp (preprocess-html-wiki video-embed-code)) wikiname) + (get-media-html image-url image-content-type)) + (p ,(if (non-empty-string? video-embed-code) + `"" + `(span (a (@ (href ,maybe-proxied-raw-image-url)) "View original file") ". ")) + "Added by " + (a (@ (href ,(format "/~a/wiki/User:~a" wikiname username))) ,username) + "." + ,(if is-posted-in + `(span " Posted in " + ,@(map (λ (article) + (define page-path (jp "/title" article)) + (define title (jp "/titleText" article page-path)) + `(span ,(if (eq? (car smaller-article-list) article) "" ", ") + (a (@ (href ,(format "/~a/wiki/~a" wikiname page-path))) + ,title))) + smaller-article-list) + ,(if (eq? article-list-is-smaller 1) "…" ".")) + `"")) + ,(if (string? image-description) + (update-tree-wiki (html->xexp (preprocess-html-wiki image-description)) wikiname) + "")))) + +(define (page-file req) + (define wikiname (path/param-path (first (url-path (request-uri req))))) + (define prefixed-title (path/param-path (caddr (url-path (request-uri req))))) + (define origin (format "https://~a.fandom.com" wikiname)) + (define source-url (format "~a/wiki/~a" origin prefixed-title)) + + (thread-let ([media-detail + (define dest-url + (format "~a/wikia.php?~a" + origin + (params->query `(("format" . "json") ("controller" . "Lightbox") + ("method" . "getMediaDetail") + ("fileTitle" . ,prefixed-title))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [license (license-auto wikiname)]) + (if (not (jp "/exists" media-detail #f)) + (next-dispatcher) + (response-handler + (define file-title (jp "/fileTitle" media-detail "")) + (define title + (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) + (define image-content-type + (if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) + #f + (url-content-type (jp "/imageUrl" media-detail)))) + (define body + (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:license license)) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output #:code 200 + #:headers (list referrer-policy) + (λ (out) (write-html body out))))))) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") + `(img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb") + (controls))))) + (parameterize ([(config-parameter 'strict_proxy) "no"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc") + (controls)))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") + `(video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd") + (controls))))) + (check-equal? (get-media-html "https://example.com" "who knows") `"") + (check-equal? (get-media-html "https://example.com" #f) `"")) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-not-false + ((query-selector + (attribute-selector 'src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fexamplefile") + (generate-results-page #:source-url "" + #:wikiname "test" + #:title "File:Example file" + #:media-detail test-media-detail + #:image-content-type "image/jpeg")))))) From 9aba3ad4320229dc9837a0fa647f1f27f83bbf58 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sun, 9 Oct 2022 20:53:09 +1300 Subject: [PATCH 3/8] Refactor siteinfo/license fetching --- info.rkt | 2 +- src/data.rkt | 29 +++++++++++++++-------------- src/page-category.rkt | 2 +- src/page-search.rkt | 2 +- src/page-wiki.rkt | 2 +- 5 files changed, 19 insertions(+), 18 deletions(-) diff --git a/info.rkt b/info.rkt index 74152ef..46512df 100644 --- a/info.rkt +++ b/info.rkt @@ -1,3 +1,3 @@ #lang info -(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib")) +(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo")) diff --git a/src/data.rkt b/src/data.rkt index 8eb0cd6..fae07d3 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -1,33 +1,34 @@ #lang racket/base -(require (prefix-in easy: net/http-easy) +(require racket/list + (prefix-in easy: net/http-easy) + memo "url-utils.rkt" "xexpr-utils.rkt") (provide + (struct-out siteinfo) (struct-out license) - license-default - license-auto) + siteinfo-fetch + license-default) +(struct siteinfo (sitename basepage license) #:transparent) (struct license (text url) #:transparent) + (define license-default (license "CC-BY-SA" "https://www.fandom.com/licensing")) -(define license-hash (make-hash)) -(define (license-fetch wikiname) + +(define/memoize (siteinfo-fetch wikiname) #:hash hash (define dest-url (format "https://~a.fandom.com/api.php?~a" wikiname (params->query '(("action" . "query") ("meta" . "siteinfo") - ("siprop" . "rightsinfo") + ("siprop" . "general|rightsinfo") ("format" . "json") ("formatversion" . "2"))))) (log-outgoing dest-url) (define res (easy:get dest-url)) (define data (easy:response-json res)) - (license (jp "/query/rightsinfo/text" data) - (jp "/query/rightsinfo/url" data))) -(define (license-auto wikiname) - (if (hash-has-key? license-hash wikiname) - (hash-ref license-hash wikiname) - (let ([result (license-fetch wikiname)]) - (hash-set! license-hash wikiname result) - result))) + (siteinfo (jp "/query/general/sitename" data) + (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) + (license (jp "/query/rightsinfo/text" data) + (jp "/query/rightsinfo/url" data)))) diff --git a/src/page-category.rkt b/src/page-category.rkt index f7c43b2..773985d 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -89,7 +89,7 @@ (log-outgoing dest-url) (define dest-res (easy:get dest-url #:timeouts timeouts)) (easy:response-json dest-res)] - [license (license-auto wikiname)]) + [license (siteinfo-license (siteinfo-fetch wikiname))]) (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) diff --git a/src/page-search.rkt b/src/page-search.rkt index 387deab..496dac8 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -70,7 +70,7 @@ (thread-let ([dest-res (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [license (siteinfo-license (siteinfo-fetch wikiname))]) (define data (easy:response-json dest-res)) diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index a218dfe..32255e1 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -248,7 +248,7 @@ ("format" . "json"))))) (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [license (siteinfo-license (siteinfo-fetch wikiname))]) (cond [(eq? 200 (easy:response-status-code dest-res)) From 59332fd9d13e7c66523816b0a96d9ab8c1728559 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sun, 9 Oct 2022 22:50:50 +1300 Subject: [PATCH 4/8] Pass siteinfo through code; show sitename in title --- src/application-globals.rkt | 14 +++++++++----- src/data.rkt | 20 +++++++++++--------- src/page-category.rkt | 8 ++++---- src/page-search.rkt | 8 ++++---- src/page-wiki.rkt | 4 ++-- 5 files changed, 30 insertions(+), 24 deletions(-) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index c214924..6b5f3d9 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -48,8 +48,8 @@ ,(if source-url `(div (p "This page displays proxied content from " (a (@ (href ,source-url) (rel "noreferrer")) ,source-url) - ,(format ". Text content is available under the ~a license, " (license-text license)) - (a (@ (href ,(license-url license))) "see license info.") + ,(format ". Text content is available under the ~a license, " (license^-text license)) + (a (@ (href ,(license^-url license))) "see license info.") " Media files may have different copying restrictions.") (p ,(format "Fandom is a trademark of Fandom, Inc. ~a is not affiliated with Fandom." (config-get 'application_name)))) `(div (p "Text content on wikis run by Fandom is available under the Creative Commons Attribution-Share Alike License 3.0 (Unported), " @@ -63,7 +63,8 @@ #:wikiname wikiname #:title title #:body-class [body-class-in #f] - #:license [license #f]) + #:siteinfo [siteinfo-in #f]) + (define siteinfo (or siteinfo-in siteinfo-default)) (define body-class (if (not body-class-in) "skin-fandomdesktop" body-class-in)) @@ -82,7 +83,10 @@ `(html (head (meta (@ (name "viewport") (content "width=device-width, initial-scale=1"))) - (title ,(format "~a | ~a" title (config-get 'application_name))) + (title ,(format "~a | ~a+~a" + title + (regexp-replace #rx" ?Wiki$" (siteinfo^-sitename siteinfo) "") + (config-get 'application_name))) ,@(map (λ (url) `(link (@ (rel "stylesheet") (type "text/css") (href ,url)))) (required-styles (format "https://~a.fandom.com" wikiname))) @@ -101,7 +105,7 @@ (div (@ (id "content") #;(class "page-content")) (div (@ (id "mw-content-text")) ,content)) - ,(application-footer source-url #:license license))))))) + ,(application-footer source-url #:license (siteinfo^-license siteinfo)))))))) (module+ test (define page (parameterize ([(config-parameter 'strict_proxy) "true"]) diff --git a/src/data.rkt b/src/data.rkt index fae07d3..6673e4c 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -6,15 +6,17 @@ "xexpr-utils.rkt") (provide - (struct-out siteinfo) - (struct-out license) + (struct-out siteinfo^) + (struct-out license^) siteinfo-fetch + siteinfo-default license-default) -(struct siteinfo (sitename basepage license) #:transparent) -(struct license (text url) #:transparent) +(struct siteinfo^ (sitename basepage license) #:transparent) +(struct license^ (text url) #:transparent) -(define license-default (license "CC-BY-SA" "https://www.fandom.com/licensing")) +(define license-default (license^ "CC-BY-SA" "https://www.fandom.com/licensing")) +(define siteinfo-default (siteinfo^ "Test Wiki" "Main_Page" license-default)) (define/memoize (siteinfo-fetch wikiname) #:hash hash (define dest-url @@ -28,7 +30,7 @@ (log-outgoing dest-url) (define res (easy:get dest-url)) (define data (easy:response-json res)) - (siteinfo (jp "/query/general/sitename" data) - (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) - (license (jp "/query/rightsinfo/text" data) - (jp "/query/rightsinfo/url" data)))) + (siteinfo^ (jp "/query/general/sitename" data) + (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) + (license^ (jp "/query/rightsinfo/text" data) + (jp "/query/rightsinfo/url" data)))) diff --git a/src/page-category.rkt b/src/page-category.rkt index 773985d..bf8b982 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -35,14 +35,14 @@ #:members-data members-data #:page page #:body-class [body-class #f] - #:license [license #f]) + #:siteinfo [siteinfo #f]) (define members (jp "/query/categorymembers" members-data)) (generate-wiki-page #:source-url source-url #:wikiname wikiname #:title title #:body-class body-class - #:license license + #:siteinfo siteinfo `(div ,(update-tree-wiki page wikiname) (hr) @@ -89,7 +89,7 @@ (log-outgoing dest-url) (define dest-res (easy:get dest-url #:timeouts timeouts)) (easy:response-json dest-res)] - [license (siteinfo-license (siteinfo-fetch wikiname))]) + [siteinfo (siteinfo-fetch wikiname)]) (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) @@ -105,7 +105,7 @@ #:members-data members-data #:page page #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects diff --git a/src/page-search.rkt b/src/page-search.rkt index 496dac8..0647e57 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -25,13 +25,13 @@ (define search-json-data '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) -(define (generate-results-page dest-url wikiname query data #:license [license #f]) +(define (generate-results-page dest-url wikiname query data #:siteinfo [siteinfo #f]) (define search-results (jp "/query/search" data)) (generate-wiki-page #:source-url dest-url #:wikiname wikiname #:title "Search Results" - #:license license + #:siteinfo siteinfo `(div (@ (class "mw-parser-output")) (p ,(format "~a results found for " (length search-results)) (strong ,query)) @@ -70,11 +70,11 @@ (thread-let ([dest-res (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (siteinfo-license (siteinfo-fetch wikiname))]) + [siteinfo (siteinfo-fetch wikiname)]) (define data (easy:response-json dest-res)) - (define body (generate-results-page dest-url wikiname query data #:license license)) + (define body (generate-results-page dest-url wikiname query data #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index 32255e1..41dc215 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -248,7 +248,7 @@ ("format" . "json"))))) (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (siteinfo-license (siteinfo-fetch wikiname))]) + [siteinfo (siteinfo-fetch wikiname)]) (cond [(eq? 200 (easy:response-status-code dest-res)) @@ -271,7 +271,7 @@ #:wikiname wikiname #:title title #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) (define headers (if redirect-msg (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] From ade7878f7b090cfef9860d4e98c2486d9edbbfb8 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sun, 9 Oct 2022 22:54:59 +1300 Subject: [PATCH 5/8] Redirect to actual wiki main page --- src/page-redirect-wiki-home.rkt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/page-redirect-wiki-home.rkt b/src/page-redirect-wiki-home.rkt index b0c3df3..c8e6dde 100644 --- a/src/page-redirect-wiki-home.rkt +++ b/src/page-redirect-wiki-home.rkt @@ -2,6 +2,7 @@ (require net/url web-server/http "application-globals.rkt" + "data.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -11,5 +12,6 @@ (define (redirect-wiki-home req) (response-handler (define wikiname (path/param-path (car (url-path (request-uri req))))) - (define dest (format "/~a/wiki/Main_Page" wikiname)) + (define siteinfo (siteinfo-fetch wikiname)) + (define dest (format "/~a/wiki/~a" wikiname (or (siteinfo^-basepage siteinfo) "Main_Page"))) (generate-redirect dest))) From adc4b47b83e7e850c242b6be854060f0908b70e4 Mon Sep 17 00:00:00 2001 From: blankie Date: Sun, 9 Oct 2022 10:53:02 +0700 Subject: [PATCH 6/8] Set Referrer-Policy to no-referrer Fandom sends a fake 404 to media if there's a Referer header that has an origin that's not Fandom. However, we can choose not to send the header by setting Referrer-Policy. See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy --- src/application-globals.rkt | 3 +++ src/page-category.rkt | 1 + src/page-search.rkt | 1 + src/page-wiki.rkt | 30 +++++++++++++++++------------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index 6b5f3d9..3230b58 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,6 +9,8 @@ "url-utils.rkt") (provide + ; header to not send referers to fandom + referrer-policy ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -22,6 +24,7 @@ (require rackunit html-writing)) +(define referrer-policy (header #"Referrer-Policy" #"no-referrer")) (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) diff --git a/src/page-category.rkt b/src/page-category.rkt index bf8b982..036b59c 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -113,6 +113,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-search.rkt b/src/page-search.rkt index 0647e57..e951749 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -81,6 +81,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index 41dc215..b039695 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -152,15 +152,17 @@ (λ (v) (dict-update v 'rel (λ (s) (list (string-append (car s) " noreferrer"))) '("")))) - ; proxy images from inline styles - (curry attribute-maybe-update 'style - (λ (style) - (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style - (λ (whole url) - (string-append - "url(" - (u-proxy-url url) - ")"))))) + ; proxy images from inline styles, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'style + (λ (style) + (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style + (λ (whole url) + (string-append + "url(" + (u-proxy-url url) + ")")))) v))) ; and also their links, if strict_proxy is set (curry u (λ (v) @@ -168,8 +170,10 @@ (eq? element-type 'a) (has-class? "image-thumbnail" v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v))) - ; proxy images from src attributes - (curry attribute-maybe-update 'src u-proxy-url) + ; proxy images from src attributes, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'src u-proxy-url v))) ; don't lazyload images (curry u (λ (v) (dict-has-key? v 'data-src)) @@ -276,8 +280,8 @@ (define headers (if redirect-msg (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value))) - (list))) + (list (header #"Refresh" value) referrer-policy)) + (list referrer-policy))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid From 5813c49261c823503f712f07e40ee91e3f7a0b66 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sun, 9 Oct 2022 23:43:21 +1300 Subject: [PATCH 7/8] Refactor Referrer-Policy header - Change the variable to always-headers so it can be extended in the future - New function build-headers that assists combining complex logic headers together with less mess - Also apply headers to the proxy --- src/application-globals.rkt | 7 ++++--- src/page-category.rkt | 2 +- src/page-home.rkt | 2 ++ src/page-proxy.rkt | 2 ++ src/page-search.rkt | 2 +- src/page-wiki.rkt | 12 +++++++----- src/url-utils.rkt | 19 ++++++++++++++++++- 7 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index 3230b58..7f16bee 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,8 +9,8 @@ "url-utils.rkt") (provide - ; header to not send referers to fandom - referrer-policy + ; headers to always send on all http responses + always-headers ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -24,7 +24,8 @@ (require rackunit html-writing)) -(define referrer-policy (header #"Referrer-Policy" #"no-referrer")) +(define always-headers + (list (header #"Referrer-Policy" #"same-origin"))) ; header to not send referers to fandom (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) diff --git a/src/page-category.rkt b/src/page-category.rkt index 036b59c..6c0a733 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -113,7 +113,7 @@ (xexp->html body)) (response/output #:code 200 - #:headers (list referrer-policy) + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-home.rkt b/src/page-home.rkt index 7c7aaa1..b16f66a 100644 --- a/src/page-home.rkt +++ b/src/page-home.rkt @@ -4,6 +4,7 @@ html-writing web-server/http "application-globals.rkt" + "url-utils.rkt" "xexpr-utils.rkt" "config.rkt") @@ -81,6 +82,7 @@ (define (page-home req) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))) diff --git a/src/page-proxy.rkt b/src/page-proxy.rkt index 8dbf0f9..3c22e1e 100644 --- a/src/page-proxy.rkt +++ b/src/page-proxy.rkt @@ -8,6 +8,7 @@ net/url web-server/http (only-in web-server/dispatchers/dispatch next-dispatcher) + "application-globals.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -26,6 +27,7 @@ (response/output #:code (easy:response-status-code dest-r) #:mime-type (easy:response-headers-ref dest-r 'content-type) + #:headers (build-headers always-headers) (λ (out) (copy-port (easy:response-output dest-r) out) (easy:response-close! dest-r)))))) diff --git a/src/page-search.rkt b/src/page-search.rkt index e951749..81a88b2 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -81,7 +81,7 @@ (xexp->html body)) (response/output #:code 200 - #:headers (list referrer-policy) + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index b039695..effa40a 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -277,11 +277,13 @@ #:body-class body-class #:siteinfo siteinfo)) (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) - (define headers (if redirect-msg - (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] - [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value) referrer-policy)) - (list referrer-policy))) + (define headers + (build-headers + always-headers + (when redirect-msg + (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] + [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) + (header #"Refresh" value))))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid diff --git a/src/url-utils.rkt b/src/url-utils.rkt index 62f7cc2..10df089 100644 --- a/src/url-utils.rkt +++ b/src/url-utils.rkt @@ -2,6 +2,8 @@ (require racket/string "config.rkt" "pure-utils.rkt") +(require/typed web-server/http/request-structs + [#:opaque Header header?]) (provide ; regex to match wiki names @@ -13,7 +15,9 @@ ; check whether a url is on a domain controlled by fandom is-fandom-url? ; prints "out: " - log-outgoing) + log-outgoing + ; pass in a header, headers, or something useless. they'll all combine into a list + build-headers) (module+ test (require "typed-rackunit.rkt")) @@ -81,3 +85,16 @@ (define (log-outgoing url-string) (when (config-true? 'log_outgoing) (printf "out: ~a~n" url-string))) + +(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header))) +(define (build-headers . fs) + (apply + append + (map (λ ([f : (U Header (Listof Header) False Void)]) + (cond + [(not f) null] + [(void? f) null] + [(null? f) null] + [(header? f) (list f)] + [(pair? f) f])) + fs))) From 5ad532642546ed1bd0ddc2089805e2b8a1d9dde5 Mon Sep 17 00:00:00 2001 From: blankie Date: Sat, 8 Oct 2022 15:35:35 +0700 Subject: [PATCH 8/8] Add support for File: pages Fixes https://lists.sr.ht/~cadence/breezewiki-discuss/%3Cb2835a70-5118-4df0-90c9-4333486a4b69%40nixnetmail.com%3E --- breezewiki.rkt | 2 + dist.rkt | 2 + src/dispatcher-tree.rkt | 1 + src/page-file.rkt | 166 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 src/page-file.rkt diff --git a/breezewiki.rkt b/breezewiki.rkt index dfb405e..3fc9b8f 100644 --- a/breezewiki.rkt +++ b/breezewiki.rkt @@ -19,6 +19,7 @@ (require-reloadable "src/page-static.rkt" static-dispatcher) (require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher) (require-reloadable "src/page-wiki.rkt" page-wiki) +(require-reloadable "src/page-file.rkt" page-file) (reload!) @@ -38,6 +39,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher)))) diff --git a/dist.rkt b/dist.rkt index a626695..805df48 100644 --- a/dist.rkt +++ b/dist.rkt @@ -13,6 +13,7 @@ (require (only-in "src/page-static.rkt" static-dispatcher)) (require (only-in "src/page-subdomain.rkt" subdomain-dispatcher)) (require (only-in "src/page-wiki.rkt" page-wiki)) +(require (only-in "src/page-file.rkt" page-file)) (serve/launch/wait #:listen-ip (if (config-true? 'debug) "127.0.0.1" #f) @@ -27,6 +28,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher))) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index f2c1412..b68cf9c 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -45,6 +45,7 @@ (pathprocedure:make "/proxy" (hash-ref ds 'page-proxy)) (pathprocedure:make "/search" (hash-ref ds 'page-global-search)) (filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-category))) + (filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-file))) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki))) (filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search))) (filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home))) diff --git a/src/page-file.rkt b/src/page-file.rkt new file mode 100644 index 0000000..ee27749 --- /dev/null +++ b/src/page-file.rkt @@ -0,0 +1,166 @@ +#lang racket/base +(require racket/dict + racket/list + racket/match + racket/string + (prefix-in easy: net/http-easy) + ; html libs + html-parsing + html-writing + ; web server libs + net/url + web-server/http + (only-in web-server/dispatchers/dispatch next-dispatcher) + #;(only-in web-server/http/redirect redirect-to) + "application-globals.rkt" + "config.rkt" + "data.rkt" + "page-wiki.rkt" + "syntax.rkt" + "url-utils.rkt" + "xexpr-utils.rkt") + +(provide page-file) + +(module+ test + (require rackunit) + (define test-media-detail + '#hasheq((fileTitle . "Example file") + (videoEmbedCode . "") + (imageUrl . "https://static.wikia.nocookie.net/examplefile") + (rawImageUrl . "https://static.wikia.nocookie.net/examplefile") + (userName . "blankie") + (isPostedIn . #t) + (smallerArticleList . (#hasheq((title . "Example_article") + (titleText . "Example article")))) + (articleListIsSmaller . 0) + (exists . #t) + (imageDescription . #f)))) + +(define (url-content-type url) + (log-outgoing url) + (define dest-res (easy:head url #:timeouts timeouts)) + (easy:response-headers-ref dest-res 'content-type)) + +(define (get-media-html url content-type) + (define maybe-proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) + (cond + [(eq? content-type #f) `""] + [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,maybe-proxied-url)))] + [(regexp-match? #rx"(?i:^audio/|^application/ogg(;|$))" content-type) + `(audio (@ (src ,maybe-proxied-url) (controls)))] + [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,maybe-proxied-url) (controls)))] + [else `""])) + +(define (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:siteinfo [siteinfo #f]) + (define video-embed-code (jp "/videoEmbedCode" media-detail "")) + (define raw-image-url (jp "/rawImageUrl" media-detail)) + (define image-url (jp "/imageUrl" media-detail raw-image-url)) + (define username (jp "/userName" media-detail)) + (define is-posted-in (jp "/isPostedIn" media-detail #f)) + (define smaller-article-list (jp "/smallerArticleList" media-detail)) + (define article-list-is-smaller (jp "/articleListIsSmaller" media-detail)) + (define image-description (jp "/imageDescription" media-detail #f)) + (define maybe-proxied-raw-image-url + (if (config-true? 'strict_proxy) (u-proxy-url raw-image-url) raw-image-url)) + (generate-wiki-page + #:source-url source-url + #:wikiname wikiname + #:title title + #:siteinfo siteinfo + `(div ,(if (non-empty-string? video-embed-code) + (update-tree-wiki (html->xexp (preprocess-html-wiki video-embed-code)) wikiname) + (get-media-html image-url image-content-type)) + (p ,(if (non-empty-string? video-embed-code) + `"" + `(span (a (@ (href ,maybe-proxied-raw-image-url)) "View original file") ". ")) + "Added by " + (a (@ (href ,(format "/~a/wiki/User:~a" wikiname username))) ,username) + "." + ,(if is-posted-in + `(span " Posted in " + ,@(map (λ (article) + (define page-path (jp "/title" article)) + (define title (jp "/titleText" article page-path)) + `(span ,(if (eq? (car smaller-article-list) article) "" ", ") + (a (@ (href ,(format "/~a/wiki/~a" wikiname page-path))) + ,title))) + smaller-article-list) + ,(if (eq? article-list-is-smaller 1) "…" ".")) + `"")) + ,(if (string? image-description) + (update-tree-wiki (html->xexp (preprocess-html-wiki image-description)) wikiname) + "")))) + +(define (page-file req) + (define wikiname (path/param-path (first (url-path (request-uri req))))) + (define prefixed-title (path/param-path (caddr (url-path (request-uri req))))) + (define origin (format "https://~a.fandom.com" wikiname)) + (define source-url (format "~a/wiki/~a" origin prefixed-title)) + + (thread-let ([media-detail + (define dest-url + (format "~a/wikia.php?~a" + origin + (params->query `(("format" . "json") ("controller" . "Lightbox") + ("method" . "getMediaDetail") + ("fileTitle" . ,prefixed-title))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [siteinfo (siteinfo-fetch wikiname)]) + (if (not (jp "/exists" media-detail #f)) + (next-dispatcher) + (response-handler + (define file-title (jp "/fileTitle" media-detail "")) + (define title + (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) + (define image-content-type + (if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) + #f + (url-content-type (jp "/imageUrl" media-detail)))) + (define body + (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:siteinfo siteinfo)) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output #:code 200 + #:headers (list referrer-policy) + (λ (out) (write-html body out))))))) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") + `(img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb") + (controls))))) + (parameterize ([(config-parameter 'strict_proxy) "no"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc") + (controls)))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") + `(video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd") + (controls))))) + (check-equal? (get-media-html "https://example.com" "who knows") `"") + (check-equal? (get-media-html "https://example.com" #f) `"")) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-not-false + ((query-selector + (attribute-selector 'src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fexamplefile") + (generate-results-page #:source-url "" + #:wikiname "test" + #:title "File:Example file" + #:media-detail test-media-detail + #:image-content-type "image/jpeg"))))))