From 6260ba809bdd2321fa59c6133f749f2db0d23e17 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 1 May 2024 00:53:09 +1200 Subject: [PATCH] Fix running out of file descriptors --- lib/thread-utils.rkt | 3 +-- src/application-globals.rkt | 3 --- src/data.rkt | 20 +++++++------- src/dispatcher-tree.rkt | 21 +++++++++++++-- src/fandom-request.rkt | 48 ++++++++++++++++++++++++++++++++++ src/page-category.rkt | 42 +++++++++++++---------------- src/page-file.rkt | 23 +++++++--------- src/page-search.rkt | 2 -- src/page-wiki.rkt | 34 +++++++++++++----------- src/search-provider-fandom.rkt | 24 +++++++---------- src/search-provider-solr.rkt | 3 +-- src/whole-utils.rkt | 11 -------- 12 files changed, 134 insertions(+), 100 deletions(-) create mode 100644 src/fandom-request.rkt delete mode 100644 src/whole-utils.rkt diff --git a/lib/thread-utils.rkt b/lib/thread-utils.rkt index f907dac..66e2b4c 100644 --- a/lib/thread-utils.rkt +++ b/lib/thread-utils.rkt @@ -1,6 +1,5 @@ #lang racket/base -(require (prefix-in easy: net/http-easy) - "../src/data.rkt" +(require "../src/data.rkt" "xexpr-utils.rkt") (provide diff --git a/src/application-globals.rkt b/src/application-globals.rkt index de60820..fb8f118 100644 --- a/src/application-globals.rkt +++ b/src/application-globals.rkt @@ -22,8 +22,6 @@ (provide ; headers to always send on all http responses always-headers - ; timeout durations for http-easy requests - timeouts ; generates a consistent footer application-footer ; generates a consistent template for wiki page content to sit in @@ -39,7 +37,6 @@ (define always-headers (list (header #"Referrer-Policy" #"same-origin") ; header to not send referers to fandom (header #"Link" (string->bytes/latin-1 link-header)))) -(define timeouts (easy:make-timeout-config #:lease 5 #:connect 5)) (define-runtime-path path-static "../static") (define theme-icons diff --git a/src/data.rkt b/src/data.rkt index b22e8a0..6975b37 100644 --- a/src/data.rkt +++ b/src/data.rkt @@ -7,8 +7,8 @@ (prefix-in easy: net/http-easy) db memo + "fandom-request.rkt" "static-data.rkt" - "whole-utils.rkt" "../lib/url-utils.rkt" "../lib/xexpr-utils.rkt" "../archiver/archiver-database.rkt" @@ -54,16 +54,14 @@ (vector-ref row 3))) siteinfo-default)] [else - (define dest-url - (format "https://~a.fandom.com/api.php?~a" - wikiname - (params->query '(("action" . "query") - ("meta" . "siteinfo") - ("siprop" . "general|rightsinfo") - ("format" . "json") - ("formatversion" . "2"))))) - (log-outgoing dest-url) - (define res (easy:get dest-url)) + (define res + (fandom-get-api + wikiname + (params->query '(("action" . "query") + ("meta" . "siteinfo") + ("siprop" . "general|rightsinfo") + ("format" . "json") + ("formatversion" . "2"))))) (define data (easy:response-json res)) (siteinfo^ (jp "/query/general/sitename" data) (second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data))) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index 48e8ebb..0212242 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -33,12 +33,29 @@ ; don't forget that I'm returning *code* - return a call to the function (datum->syntax stx `(make-dispatcher-tree ,ds))) +; guard that the page returned a response, otherwise print more detailed debugging information +(define-syntax-rule (page ds name) + (λ (req) + (define dispatcher (hash-ref ds (quote name))) + (define page-response (dispatcher req)) + (if (response? page-response) + page-response + (response/output + #:code 500 + #:mime-type #"text/plain" + (λ (out) + (for ([port (list (current-error-port) out)]) + (parameterize ([current-output-port port]) + (printf "error in ~a:~n expected page to return a response~n actually returned: ~v~n" + (quote name) + page-response)))))))) + (define (make-dispatcher-tree ds) (define subdomain-dispatcher (hash-ref ds 'subdomain-dispatcher)) (define tree (sequencer:make subdomain-dispatcher - (pathprocedure:make "/" (hash-ref ds 'page-home)) + (pathprocedure:make "/" (page ds page-home)) (pathprocedure:make "/proxy" (hash-ref ds 'page-proxy)) (pathprocedure:make "/search" (hash-ref ds 'page-global-search)) (pathprocedure:make "/set-user-settings" (hash-ref ds 'page-set-user-settings)) @@ -48,7 +65,7 @@ (if (config-true? 'feature_offline::enabled) (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki-offline))) (λ (_conn _req) (next-dispatcher))) - (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki))) + (filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki))) (filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search))) (filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home))) (if (config-true? 'feature_offline::enabled) diff --git a/src/fandom-request.rkt b/src/fandom-request.rkt new file mode 100644 index 0000000..966eeee --- /dev/null +++ b/src/fandom-request.rkt @@ -0,0 +1,48 @@ +#lang typed/racket/base +(require "config.rkt" + "../lib/url-utils.rkt") +(define-type Headers (HashTable Symbol (U Bytes String))) +(require/typed net/http-easy + [#:opaque Timeout-Config timeout-config?] + [#:opaque Response response?] + [#:opaque Session session?] + [current-session (Parameter Session)] + [make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)] + [get ((U Bytes String) + [#:close? Boolean] + [#:headers Headers] + [#:timeouts Timeout-Config] + [#:max-attempts Exact-Positive-Integer] + [#:max-redirects Exact-Nonnegative-Integer] + [#:user-agent (U Bytes String)] + -> Response)]) + +(provide + fandom-get + fandom-get-api + timeouts) + +(define timeouts (make-timeout-config #:lease 5 #:connect 5)) + +(: no-headers Headers) +(define no-headers '#hasheq()) + +(: fandom-get (String String [#:headers (Option Headers)] -> Response)) +(define (fandom-get wikiname path #:headers [headers #f]) + (define dest-url (string-append "https://www.fandom.com" path)) + (define host (string-append wikiname ".fandom.com")) + (log-outgoing wikiname path) + (get dest-url + #:timeouts timeouts + #:headers (hash-set (or headers no-headers) 'Host host))) + +(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response)) +(define (fandom-get-api wikiname params #:headers [headers #f]) + (fandom-get wikiname + (string-append "/api.php?" (params->query params)) + #:headers headers)) + +(: log-outgoing (String String -> Void)) +(define (log-outgoing wikiname path) + (when (config-true? 'log_outgoing) + (printf "out: ~a ~a~n" wikiname path))) diff --git a/src/page-category.rkt b/src/page-category.rkt index 213d423..e1fe659 100644 --- a/src/page-category.rkt +++ b/src/page-category.rkt @@ -15,11 +15,11 @@ "application-globals.rkt" "config.rkt" "data.rkt" + "fandom-request.rkt" "page-wiki.rkt" "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide @@ -73,30 +73,24 @@ (define-values (members-data page-data siteinfo) (thread-values (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "query") - ("list" . "categorymembers") - ("cmtitle" . ,prefixed-category) - ("cmlimit" . "max") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) - (easy:response-json dest-res)) + (easy:response-json + (fandom-get-api + wikiname + `(("action" . "query") + ("list" . "categorymembers") + ("cmtitle" . ,prefixed-category) + ("cmlimit" . "max") + ("formatversion" . "2") + ("format" . "json"))))) (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "parse") - ("page" . ,prefixed-category) - ("prop" . "text|headhtml|langlinks") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) - (easy:response-json dest-res)) + (easy:response-json + (fandom-get-api + wikiname + `(("action" . "parse") + ("page" . ,prefixed-category) + ("prop" . "text|headhtml|langlinks") + ("formatversion" . "2") + ("format" . "json"))))) (λ () (siteinfo-fetch wikiname)))) diff --git a/src/page-file.rkt b/src/page-file.rkt index 2a7332c..5151f1d 100644 --- a/src/page-file.rkt +++ b/src/page-file.rkt @@ -15,11 +15,11 @@ "application-globals.rkt" "config.rkt" "data.rkt" + "fandom-request.rkt" "page-wiki.rkt" "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide page-file) @@ -40,8 +40,7 @@ (imageDescription . #f)))) (define (url-content-type url) - (log-outgoing url) - (define dest-res (easy:head url #:timeouts timeouts)) + (define dest-res (easy:head url)) (easy:response-headers-ref dest-res 'content-type)) (define (get-media-html url content-type) @@ -106,20 +105,18 @@ (response-handler (define wikiname (path/param-path (first (url-path (request-uri req))))) (define prefixed-title (path/param-path (caddr (url-path (request-uri req))))) - (define origin (format "https://~a.fandom.com" wikiname)) - (define source-url (format "~a/wiki/~a" origin prefixed-title)) + (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname prefixed-title)) (define-values (media-detail siteinfo) (thread-values (λ () - (define dest-url - (format "~a/wikia.php?~a" - origin - (params->query `(("format" . "json") ("controller" . "Lightbox") - ("method" . "getMediaDetail") - ("fileTitle" . ,prefixed-title))))) - (log-outgoing dest-url) - (define dest-res (easy:get dest-url #:timeouts timeouts)) + (define dest-res + (fandom-get + wikiname + (format "/wikia.php?~a" + (params->query `(("format" . "json") ("controller" . "Lightbox") + ("method" . "getMediaDetail") + ("fileTitle" . ,prefixed-title)))))) (easy:response-json dest-res)) (λ () (siteinfo-fetch wikiname)))) diff --git a/src/page-search.rkt b/src/page-search.rkt index 019ebfe..39f361a 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -2,7 +2,6 @@ (require racket/dict racket/list racket/string - (prefix-in easy: net/http-easy) ; html libs html-writing ; web server libs @@ -18,7 +17,6 @@ "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide diff --git a/src/page-wiki.rkt b/src/page-wiki.rkt index ae060d4..f16792c 100644 --- a/src/page-wiki.rkt +++ b/src/page-wiki.rkt @@ -17,12 +17,12 @@ "application-globals.rkt" "config.rkt" "data.rkt" + "fandom-request.rkt" "../lib/pure-utils.rkt" "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/tree-updater.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide @@ -38,25 +38,20 @@ (define (page-wiki req) (define wikiname (path/param-path (first (url-path (request-uri req))))) (define user-cookies (user-cookies-getter req)) - (define origin (format "https://~a.fandom.com" wikiname)) (define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/")) (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) (define-values (dest-res siteinfo) (thread-values (λ () - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "parse") - ("page" . ,path) - ("prop" . "text|headhtml|langlinks") - ("formatversion" . "2") - ("format" . "json"))))) - (log-outgoing dest-url) - (easy:get dest-url - #:timeouts timeouts - #:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies)))))) + (fandom-get-api + wikiname + `(("action" . "parse") + ("page" . ,path) + ("prop" . "text|headhtml|langlinks") + ("formatversion" . "2") + ("format" . "json")) + #:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies)))))) (λ () (siteinfo-fetch wikiname)))) @@ -103,4 +98,13 @@ #:code 200 #:headers headers (λ (out) - (write-html body out))))))])) + (write-html body out))))))] + [(eq? 404 (easy:response-status-code dest-res)) + (next-dispatcher)] + [else + (response-handler + (error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a" + wikiname + path + (easy:response-status-code dest-res) + (easy:response-body dest-res)))])) diff --git a/src/search-provider-fandom.rkt b/src/search-provider-fandom.rkt index 2338c13..b8dd48f 100644 --- a/src/search-provider-fandom.rkt +++ b/src/search-provider-fandom.rkt @@ -3,8 +3,8 @@ (prefix-in easy: net/http-easy) "application-globals.rkt" "config.rkt" + "fandom-request.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide @@ -17,20 +17,14 @@ '(#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))) (define (search-fandom wikiname query params) - ;; constructing the URL where I want to get fandom data from... - (define origin (format "https://~a.fandom.com" wikiname)) - ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "query") - ("list" . "search") - ("srsearch" . ,query) - ("formatversion" . "2") - ("format" . "json"))))) - ;; HTTP request to dest-url for search results - (log-outgoing dest-url) - (define res (easy:get dest-url #:timeouts timeouts)) + (define res + (fandom-get-api + wikiname + `(("action" . "query") + ("list" . "search") + ("srsearch" . ,query) + ("formatversion" . "2") + ("format" . "json")))) (define json (easy:response-json res)) (define search-results (jp "/query/search" json)) (generate-results-content-fandom wikiname query search-results)) diff --git a/src/search-provider-solr.rkt b/src/search-provider-solr.rkt index 1ec48e2..31813da 100644 --- a/src/search-provider-solr.rkt +++ b/src/search-provider-solr.rkt @@ -5,7 +5,6 @@ "application-globals.rkt" "../lib/html-parsing/main.rkt" "../lib/url-utils.rkt" - "whole-utils.rkt" "../lib/xexpr-utils.rkt") (provide @@ -37,7 +36,7 @@ ("sort" . ,(cdr sort)))))) ;; HTTP request to dest-url for search results (log-outgoing dest-url) - (define res (easy:get dest-url #:timeouts timeouts)) + (define res (easy:get dest-url #:timeouts (easy:make-timeout-config #:lease 5 #:connect 5))) (define json (easy:response-json res)) ;; build result objects diff --git a/src/whole-utils.rkt b/src/whole-utils.rkt deleted file mode 100644 index 7118866..0000000 --- a/src/whole-utils.rkt +++ /dev/null @@ -1,11 +0,0 @@ -#lang typed/racket/base -(require "config.rkt") - -(provide - ; prints "out: " - log-outgoing) - -(: log-outgoing (String -> Void)) -(define (log-outgoing url-string) - (when (config-true? 'log_outgoing) - (printf "out: ~a~n" url-string)))