From bc07a37bf7ba2551f2f4b2e8d0a20551f3c11ef5 Mon Sep 17 00:00:00 2001 From: blankie Date: Sun, 9 Oct 2022 10:53:02 +0700 Subject: [PATCH 1/3] Set Referrer-Policy to no-referrer Fandom sends a fake 404 to media if there's a Referer header that has an origin that's not Fandom. However, we can choose not to send the header by setting Referrer-Policy. See also: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy --- src/application-globals.rkt | 3 +++ src/page-category.rkt | 1 + src/page-search.rkt | 1 + src/page-wiki.rkt | 30 +++++++++++++++++------------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index c214924..9a21592 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,6 +9,8 @@ "url-utils.rkt") (provide + ; header to not send referers to fandom + referrer-policy ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -22,6 +24,7 @@ (require rackunit html-writing)) +(define referrer-policy (header #"Referrer-Policy" #"no-referrer")) (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) diff --git a/src/page-category.rkt b/src/page-category.rkt index f7c43b2..c9c3ec2 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -113,6 +113,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-search.rkt b/src/page-search.rkt index 387deab..d42fce9 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -81,6 +81,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index a218dfe..be24af8 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -152,15 +152,17 @@ (λ (v) (dict-update v 'rel (λ (s) (list (string-append (car s) " noreferrer"))) '("")))) - ; proxy images from inline styles - (curry attribute-maybe-update 'style - (λ (style) - (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style - (λ (whole url) - (string-append - "url(" - (u-proxy-url url) - ")"))))) + ; proxy images from inline styles, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'style + (λ (style) + (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style + (λ (whole url) + (string-append + "url(" + (u-proxy-url url) + ")")))) v))) ; and also their links, if strict_proxy is set (curry u (λ (v) @@ -168,8 +170,10 @@ (eq? element-type 'a) (has-class? "image-thumbnail" v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v))) - ; proxy images from src attributes - (curry attribute-maybe-update 'src u-proxy-url) + ; proxy images from src attributes, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'src u-proxy-url v))) ; don't lazyload images (curry u (λ (v) (dict-has-key? v 'data-src)) @@ -276,8 +280,8 @@ (define headers (if redirect-msg (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value))) - (list))) + (list (header #"Refresh" value) referrer-policy)) + (list referrer-policy))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid From ba806d0633c5246910df429fd2e4b477decf86e9 Mon Sep 17 00:00:00 2001 From: blankie Date: Sun, 9 Oct 2022 14:16:15 +0700 Subject: [PATCH 2/3] simple tests --- src/page-file.rkt | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/page-file.rkt b/src/page-file.rkt index d04c135..a2490ab 100644 --- a/src/page-file.rkt +++ b/src/page-file.rkt @@ -33,13 +33,22 @@ (easy:response-headers-ref dest-res 'content-type)) (define (get-media-html url content-type) + (define proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) (cond [(eq? content-type #f) `""] - [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,url)))] + [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,proxied-url)))] [(regexp-match? #rx"(?i:^audio/|^application/ogg(;|$))" content-type) - `(audio (@ (src ,url) (controls)))] - [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,url) (controls)))] + `(audio (@ (src ,proxied-url) (controls)))] + [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,proxied-url) (controls)))] [else `""])) +(module+ test + (require rackunit) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") (img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") (video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd")))) + (check-equal? (get-media-html "https://example.com" "who knows") "") + (check-equal? (get-media-html #f "who knows") "")) (define (generate-results-page #:source-url source-url #:wikiname wikiname @@ -125,6 +134,7 @@ ; convert to string with error checking, error will be raised if xexp is invalid (xexp->html body)) (response/output #:code 200 + #:headers (list referrer-policy) (λ (out) (write-html body out))))))) ;(module+ test ; (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Ankle_Monitor") From 19aeeea60fc6c7c724206c2ba2d21f847c366d98 Mon Sep 17 00:00:00 2001 From: blankie Date: Sun, 9 Oct 2022 14:21:28 +0700 Subject: [PATCH 3/3] more stuff --- src/page-file.rkt | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/page-file.rkt b/src/page-file.rkt index a2490ab..d20365f 100644 --- a/src/page-file.rkt +++ b/src/page-file.rkt @@ -33,20 +33,22 @@ (easy:response-headers-ref dest-res 'content-type)) (define (get-media-html url content-type) - (define proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) + (define maybe-proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) (cond [(eq? content-type #f) `""] - [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,proxied-url)))] + [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,maybe-proxied-url)))] [(regexp-match? #rx"(?i:^audio/|^application/ogg(;|$))" content-type) - `(audio (@ (src ,proxied-url) (controls)))] - [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,proxied-url) (controls)))] + `(audio (@ (src ,maybe-proxied-url) (controls)))] + [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,maybe-proxied-url) (controls)))] [else `""])) (module+ test (require rackunit) - (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") (img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) - (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb")))) - (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc")))) - (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") (video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd")))) + (parameterize ([config-parameter 'strict_proxy "true"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") (img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb"))))) + (parameterize ([config-parameter 'strict_proxy "no"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") (audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") (video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd"))))) (check-equal? (get-media-html "https://example.com" "who knows") "") (check-equal? (get-media-html #f "who knows") ""))