diff --git a/breezewiki.rkt b/breezewiki.rkt index dfb405e..3fc9b8f 100644 --- a/breezewiki.rkt +++ b/breezewiki.rkt @@ -19,6 +19,7 @@ (require-reloadable "src/page-static.rkt" static-dispatcher) (require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher) (require-reloadable "src/page-wiki.rkt" page-wiki) +(require-reloadable "src/page-file.rkt" page-file) (reload!) @@ -38,6 +39,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher)))) diff --git a/dist.rkt b/dist.rkt index a626695..805df48 100644 --- a/dist.rkt +++ b/dist.rkt @@ -13,6 +13,7 @@ (require (only-in "src/page-static.rkt" static-dispatcher)) (require (only-in "src/page-subdomain.rkt" subdomain-dispatcher)) (require (only-in "src/page-wiki.rkt" page-wiki)) +(require (only-in "src/page-file.rkt" page-file)) (serve/launch/wait #:listen-ip (if (config-true? 'debug) "127.0.0.1" #f) @@ -27,6 +28,7 @@ page-proxy page-search page-wiki + page-file redirect-wiki-home static-dispatcher subdomain-dispatcher))) diff --git a/info.rkt b/info.rkt index 74152ef..46512df 100644 --- a/info.rkt +++ b/info.rkt @@ -1,3 +1,3 @@ #lang info -(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib")) +(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo")) diff --git a/src/application-globals.rkt b/src/application-globals.rkt index c214924..7f16bee 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -9,6 +9,8 @@ "url-utils.rkt") (provide + ; headers to always send on all http responses + always-headers ; timeout durations for http-easy requests timeouts ; generates a consistent footer @@ -22,6 +24,8 @@ (require rackunit html-writing)) +(define always-headers + (list (header #"Referrer-Policy" #"same-origin"))) ; header to not send referers to fandom (define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define (application-footer source-url #:license [license-in #f]) @@ -48,8 +52,8 @@ ,(if source-url `(div (p "This page displays proxied content from " (a (@ (href ,source-url) (rel "noreferrer")) ,source-url) - ,(format ". Text content is available under the ~a license, " (license-text license)) - (a (@ (href ,(license-url license))) "see license info.") + ,(format ". Text content is available under the ~a license, " (license^-text license)) + (a (@ (href ,(license^-url license))) "see license info.") " Media files may have different copying restrictions.") (p ,(format "Fandom is a trademark of Fandom, Inc. ~a is not affiliated with Fandom." (config-get 'application_name)))) `(div (p "Text content on wikis run by Fandom is available under the Creative Commons Attribution-Share Alike License 3.0 (Unported), " @@ -63,7 +67,8 @@ #:wikiname wikiname #:title title #:body-class [body-class-in #f] - #:license [license #f]) + #:siteinfo [siteinfo-in #f]) + (define siteinfo (or siteinfo-in siteinfo-default)) (define body-class (if (not body-class-in) "skin-fandomdesktop" body-class-in)) @@ -82,7 +87,10 @@ `(html (head (meta (@ (name "viewport") (content "width=device-width, initial-scale=1"))) - (title ,(format "~a | ~a" title (config-get 'application_name))) + (title ,(format "~a | ~a+~a" + title + (regexp-replace #rx" ?Wiki$" (siteinfo^-sitename siteinfo) "") + (config-get 'application_name))) ,@(map (λ (url) `(link (@ (rel "stylesheet") (type "text/css") (href ,url)))) (required-styles (format "https://~a.fandom.com" wikiname))) @@ -101,7 +109,7 @@ (div (@ (id "content") #;(class "page-content")) (div (@ (id "mw-content-text")) ,content)) - ,(application-footer source-url #:license license))))))) + ,(application-footer source-url #:license (siteinfo^-license siteinfo)))))))) (module+ test (define page (parameterize ([(config-parameter 'strict_proxy) "true"]) diff --git a/src/data.rkt b/src/data.rkt index 8eb0cd6..6673e4c 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -1,33 +1,36 @@ #lang racket/base -(require (prefix-in easy: net/http-easy) +(require racket/list + (prefix-in easy: net/http-easy) + memo "url-utils.rkt" "xexpr-utils.rkt") (provide - (struct-out license) - license-default - license-auto) + (struct-out siteinfo^) + (struct-out license^) + siteinfo-fetch + siteinfo-default + license-default) -(struct license (text url) #:transparent) -(define license-default (license "CC-BY-SA" "https://www.fandom.com/licensing")) -(define license-hash (make-hash)) -(define (license-fetch wikiname) +(struct siteinfo^ (sitename basepage license) #:transparent) +(struct license^ (text url) #:transparent) + +(define license-default (license^ "CC-BY-SA" "https://www.fandom.com/licensing")) +(define siteinfo-default (siteinfo^ "Test Wiki" "Main_Page" license-default)) + +(define/memoize (siteinfo-fetch wikiname) #:hash hash (define dest-url (format "https://~a.fandom.com/api.php?~a" wikiname (params->query '(("action" . "query") ("meta" . "siteinfo") - ("siprop" . "rightsinfo") + ("siprop" . "general|rightsinfo") ("format" . "json") ("formatversion" . "2"))))) (log-outgoing dest-url) (define res (easy:get dest-url)) (define data (easy:response-json res)) - (license (jp "/query/rightsinfo/text" data) - (jp "/query/rightsinfo/url" data))) -(define (license-auto wikiname) - (if (hash-has-key? license-hash wikiname) - (hash-ref license-hash wikiname) - (let ([result (license-fetch wikiname)]) - (hash-set! license-hash wikiname result) - result))) + (siteinfo^ (jp "/query/general/sitename" data) + (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) + (license^ (jp "/query/rightsinfo/text" data) + (jp "/query/rightsinfo/url" data)))) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index f2c1412..b68cf9c 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -45,6 +45,7 @@ (pathprocedure:make "/proxy" (hash-ref ds 'page-proxy)) (pathprocedure:make "/search" (hash-ref ds 'page-global-search)) (filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-category))) + (filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-file))) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki))) (filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search))) (filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home))) diff --git a/src/page-category.rkt b/src/page-category.rkt index f7c43b2..6c0a733 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -35,14 +35,14 @@ #:members-data members-data #:page page #:body-class [body-class #f] - #:license [license #f]) + #:siteinfo [siteinfo #f]) (define members (jp "/query/categorymembers" members-data)) (generate-wiki-page #:source-url source-url #:wikiname wikiname #:title title #:body-class body-class - #:license license + #:siteinfo siteinfo `(div ,(update-tree-wiki page wikiname) (hr) @@ -89,7 +89,7 @@ (log-outgoing dest-url) (define dest-res (easy:get dest-url #:timeouts timeouts)) (easy:response-json dest-res)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category))) (define page-html (preprocess-html-wiki (jp "/parse/text" page-data ""))) @@ -105,7 +105,7 @@ #:members-data members-data #:page page #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects @@ -113,6 +113,7 @@ (xexp->html body)) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-file.rkt b/src/page-file.rkt new file mode 100644 index 0000000..ee27749 --- /dev/null +++ b/src/page-file.rkt @@ -0,0 +1,166 @@ +#lang racket/base +(require racket/dict + racket/list + racket/match + racket/string + (prefix-in easy: net/http-easy) + ; html libs + html-parsing + html-writing + ; web server libs + net/url + web-server/http + (only-in web-server/dispatchers/dispatch next-dispatcher) + #;(only-in web-server/http/redirect redirect-to) + "application-globals.rkt" + "config.rkt" + "data.rkt" + "page-wiki.rkt" + "syntax.rkt" + "url-utils.rkt" + "xexpr-utils.rkt") + +(provide page-file) + +(module+ test + (require rackunit) + (define test-media-detail + '#hasheq((fileTitle . "Example file") + (videoEmbedCode . "") + (imageUrl . "https://static.wikia.nocookie.net/examplefile") + (rawImageUrl . "https://static.wikia.nocookie.net/examplefile") + (userName . "blankie") + (isPostedIn . #t) + (smallerArticleList . (#hasheq((title . "Example_article") + (titleText . "Example article")))) + (articleListIsSmaller . 0) + (exists . #t) + (imageDescription . #f)))) + +(define (url-content-type url) + (log-outgoing url) + (define dest-res (easy:head url #:timeouts timeouts)) + (easy:response-headers-ref dest-res 'content-type)) + +(define (get-media-html url content-type) + (define maybe-proxied-url (if (config-true? 'strict_proxy) (u-proxy-url url) url)) + (cond + [(eq? content-type #f) `""] + [(regexp-match? #rx"(?i:^image/)" content-type) `(img (@ (src ,maybe-proxied-url)))] + [(regexp-match? #rx"(?i:^audio/|^application/ogg(;|$))" content-type) + `(audio (@ (src ,maybe-proxied-url) (controls)))] + [(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,maybe-proxied-url) (controls)))] + [else `""])) + +(define (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:siteinfo [siteinfo #f]) + (define video-embed-code (jp "/videoEmbedCode" media-detail "")) + (define raw-image-url (jp "/rawImageUrl" media-detail)) + (define image-url (jp "/imageUrl" media-detail raw-image-url)) + (define username (jp "/userName" media-detail)) + (define is-posted-in (jp "/isPostedIn" media-detail #f)) + (define smaller-article-list (jp "/smallerArticleList" media-detail)) + (define article-list-is-smaller (jp "/articleListIsSmaller" media-detail)) + (define image-description (jp "/imageDescription" media-detail #f)) + (define maybe-proxied-raw-image-url + (if (config-true? 'strict_proxy) (u-proxy-url raw-image-url) raw-image-url)) + (generate-wiki-page + #:source-url source-url + #:wikiname wikiname + #:title title + #:siteinfo siteinfo + `(div ,(if (non-empty-string? video-embed-code) + (update-tree-wiki (html->xexp (preprocess-html-wiki video-embed-code)) wikiname) + (get-media-html image-url image-content-type)) + (p ,(if (non-empty-string? video-embed-code) + `"" + `(span (a (@ (href ,maybe-proxied-raw-image-url)) "View original file") ". ")) + "Added by " + (a (@ (href ,(format "/~a/wiki/User:~a" wikiname username))) ,username) + "." + ,(if is-posted-in + `(span " Posted in " + ,@(map (λ (article) + (define page-path (jp "/title" article)) + (define title (jp "/titleText" article page-path)) + `(span ,(if (eq? (car smaller-article-list) article) "" ", ") + (a (@ (href ,(format "/~a/wiki/~a" wikiname page-path))) + ,title))) + smaller-article-list) + ,(if (eq? article-list-is-smaller 1) "…" ".")) + `"")) + ,(if (string? image-description) + (update-tree-wiki (html->xexp (preprocess-html-wiki image-description)) wikiname) + "")))) + +(define (page-file req) + (define wikiname (path/param-path (first (url-path (request-uri req))))) + (define prefixed-title (path/param-path (caddr (url-path (request-uri req))))) + (define origin (format "https://~a.fandom.com" wikiname)) + (define source-url (format "~a/wiki/~a" origin prefixed-title)) + + (thread-let ([media-detail + (define dest-url + (format "~a/wikia.php?~a" + origin + (params->query `(("format" . "json") ("controller" . "Lightbox") + ("method" . "getMediaDetail") + ("fileTitle" . ,prefixed-title))))) + (log-outgoing dest-url) + (define dest-res (easy:get dest-url #:timeouts timeouts)) + (easy:response-json dest-res)] + [siteinfo (siteinfo-fetch wikiname)]) + (if (not (jp "/exists" media-detail #f)) + (next-dispatcher) + (response-handler + (define file-title (jp "/fileTitle" media-detail "")) + (define title + (if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title)) + (define image-content-type + (if (non-empty-string? (jp "/videoEmbedCode" media-detail "")) + #f + (url-content-type (jp "/imageUrl" media-detail)))) + (define body + (generate-results-page #:source-url source-url + #:wikiname wikiname + #:title title + #:media-detail media-detail + #:image-content-type image-content-type + #:siteinfo siteinfo)) + (when (config-true? 'debug) + ; used for its side effects + ; convert to string with error checking, error will be raised if xexp is invalid + (xexp->html body)) + (response/output #:code 200 + #:headers (list referrer-policy) + (λ (out) (write-html body out))))))) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg") + `(img (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fa")))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/b" "audio/mp3") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fb") + (controls))))) + (parameterize ([(config-parameter 'strict_proxy) "no"]) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/c" "application/ogg") + `(audio (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fc") + (controls)))) + (check-equal? (get-media-html "https://static.wikia.nocookie.net/d" "video/mp4") + `(video (@ (src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fd") + (controls))))) + (check-equal? (get-media-html "https://example.com" "who knows") `"") + (check-equal? (get-media-html "https://example.com" #f) `"")) +(module+ test + (parameterize ([(config-parameter 'strict_proxy) "true"]) + (check-not-false + ((query-selector + (attribute-selector 'src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fexamplefile") + (generate-results-page #:source-url "" + #:wikiname "test" + #:title "File:Example file" + #:media-detail test-media-detail + #:image-content-type "image/jpeg")))))) diff --git a/src/page-home.rkt b/src/page-home.rkt index 7c7aaa1..b16f66a 100644 --- a/src/page-home.rkt +++ b/src/page-home.rkt @@ -4,6 +4,7 @@ html-writing web-server/http "application-globals.rkt" + "url-utils.rkt" "xexpr-utils.rkt" "config.rkt") @@ -81,6 +82,7 @@ (define (page-home req) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))) diff --git a/src/page-proxy.rkt b/src/page-proxy.rkt index 8dbf0f9..3c22e1e 100644 --- a/src/page-proxy.rkt +++ b/src/page-proxy.rkt @@ -8,6 +8,7 @@ net/url web-server/http (only-in web-server/dispatchers/dispatch next-dispatcher) + "application-globals.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -26,6 +27,7 @@ (response/output #:code (easy:response-status-code dest-r) #:mime-type (easy:response-headers-ref dest-r 'content-type) + #:headers (build-headers always-headers) (λ (out) (copy-port (easy:response-output dest-r) out) (easy:response-close! dest-r)))))) diff --git a/src/page-redirect-wiki-home.rkt b/src/page-redirect-wiki-home.rkt index b0c3df3..c8e6dde 100644 --- a/src/page-redirect-wiki-home.rkt +++ b/src/page-redirect-wiki-home.rkt @@ -2,6 +2,7 @@ (require net/url web-server/http "application-globals.rkt" + "data.rkt" "url-utils.rkt" "xexpr-utils.rkt") @@ -11,5 +12,6 @@ (define (redirect-wiki-home req) (response-handler (define wikiname (path/param-path (car (url-path (request-uri req))))) - (define dest (format "/~a/wiki/Main_Page" wikiname)) + (define siteinfo (siteinfo-fetch wikiname)) + (define dest (format "/~a/wiki/~a" wikiname (or (siteinfo^-basepage siteinfo) "Main_Page"))) (generate-redirect dest))) diff --git a/src/page-search.rkt b/src/page-search.rkt index 387deab..81a88b2 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -25,13 +25,13 @@ (define search-json-data '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) -(define (generate-results-page dest-url wikiname query data #:license [license #f]) +(define (generate-results-page dest-url wikiname query data #:siteinfo [siteinfo #f]) (define search-results (jp "/query/search" data)) (generate-wiki-page #:source-url dest-url #:wikiname wikiname #:title "Search Results" - #:license license + #:siteinfo siteinfo `(div (@ (class "mw-parser-output")) (p ,(format "~a results found for " (length search-results)) (strong ,query)) @@ -70,17 +70,18 @@ (thread-let ([dest-res (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (define data (easy:response-json dest-res)) - (define body (generate-results-page dest-url wikiname query data #:license license)) + (define body (generate-results-page dest-url wikiname query data #:siteinfo siteinfo)) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid (xexp->html body)) (response/output #:code 200 + #:headers (build-headers always-headers) (λ (out) (write-html body out)))))) (module+ test diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index a218dfe..effa40a 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -152,15 +152,17 @@ (λ (v) (dict-update v 'rel (λ (s) (list (string-append (car s) " noreferrer"))) '("")))) - ; proxy images from inline styles - (curry attribute-maybe-update 'style - (λ (style) - (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style - (λ (whole url) - (string-append - "url(" - (u-proxy-url url) - ")"))))) + ; proxy images from inline styles, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'style + (λ (style) + (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style + (λ (whole url) + (string-append + "url(" + (u-proxy-url url) + ")")))) v))) ; and also their links, if strict_proxy is set (curry u (λ (v) @@ -168,8 +170,10 @@ (eq? element-type 'a) (has-class? "image-thumbnail" v))) (λ (v) (attribute-maybe-update 'href u-proxy-url v))) - ; proxy images from src attributes - (curry attribute-maybe-update 'src u-proxy-url) + ; proxy images from src attributes, if strict_proxy is set + (curry u + (λ (v) (config-true? 'strict_proxy)) + (λ (v) (attribute-maybe-update 'src u-proxy-url v))) ; don't lazyload images (curry u (λ (v) (dict-has-key? v 'data-src)) @@ -248,7 +252,7 @@ ("format" . "json"))))) (log-outgoing dest-url) (easy:get dest-url #:timeouts timeouts)] - [license (license-auto wikiname)]) + [siteinfo (siteinfo-fetch wikiname)]) (cond [(eq? 200 (easy:response-status-code dest-res)) @@ -271,13 +275,15 @@ #:wikiname wikiname #:title title #:body-class body-class - #:license license)) + #:siteinfo siteinfo)) (define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body))) - (define headers (if redirect-msg - (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] - [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) - (list (header #"Refresh" value))) - (list))) + (define headers + (build-headers + always-headers + (when redirect-msg + (let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))] + [value (bytes-append #"0;url=" (string->bytes/utf-8 dest))]) + (header #"Refresh" value))))) (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid diff --git a/src/url-utils.rkt b/src/url-utils.rkt index 62f7cc2..10df089 100644 --- a/src/url-utils.rkt +++ b/src/url-utils.rkt @@ -2,6 +2,8 @@ (require racket/string "config.rkt" "pure-utils.rkt") +(require/typed web-server/http/request-structs + [#:opaque Header header?]) (provide ; regex to match wiki names @@ -13,7 +15,9 @@ ; check whether a url is on a domain controlled by fandom is-fandom-url? ; prints "out: " - log-outgoing) + log-outgoing + ; pass in a header, headers, or something useless. they'll all combine into a list + build-headers) (module+ test (require "typed-rackunit.rkt")) @@ -81,3 +85,16 @@ (define (log-outgoing url-string) (when (config-true? 'log_outgoing) (printf "out: ~a~n" url-string))) + +(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header))) +(define (build-headers . fs) + (apply + append + (map (λ ([f : (U Header (Listof Header) False Void)]) + (cond + [(not f) null] + [(void? f) null] + [(null? f) null] + [(header? f) (list f)] + [(pair? f) f])) + fs)))