Fix running out of file descriptors

This commit is contained in:
Cadence Ember 2024-05-01 00:53:09 +12:00
parent a52d131b93
commit 6260ba809b
12 changed files with 134 additions and 100 deletions

View file

@ -1,6 +1,5 @@
#lang racket/base
(require (prefix-in easy: net/http-easy)
"../src/data.rkt"
(require "../src/data.rkt"
"xexpr-utils.rkt")
(provide

View file

@ -22,8 +22,6 @@
(provide
; headers to always send on all http responses
always-headers
; timeout durations for http-easy requests
timeouts
; generates a consistent footer
application-footer
; generates a consistent template for wiki page content to sit in
@ -39,7 +37,6 @@
(define always-headers
(list (header #"Referrer-Policy" #"same-origin") ; header to not send referers to fandom
(header #"Link" (string->bytes/latin-1 link-header))))
(define timeouts (easy:make-timeout-config #:lease 5 #:connect 5))
(define-runtime-path path-static "../static")
(define theme-icons

View file

@ -7,8 +7,8 @@
(prefix-in easy: net/http-easy)
db
memo
"fandom-request.rkt"
"static-data.rkt"
"whole-utils.rkt"
"../lib/url-utils.rkt"
"../lib/xexpr-utils.rkt"
"../archiver/archiver-database.rkt"
@ -54,16 +54,14 @@
(vector-ref row 3)))
siteinfo-default)]
[else
(define dest-url
(format "https://~a.fandom.com/api.php?~a"
wikiname
(params->query '(("action" . "query")
("meta" . "siteinfo")
("siprop" . "general|rightsinfo")
("format" . "json")
("formatversion" . "2")))))
(log-outgoing dest-url)
(define res (easy:get dest-url))
(define res
(fandom-get-api
wikiname
(params->query '(("action" . "query")
("meta" . "siteinfo")
("siprop" . "general|rightsinfo")
("format" . "json")
("formatversion" . "2")))))
(define data (easy:response-json res))
(siteinfo^ (jp "/query/general/sitename" data)
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))

View file

@ -33,12 +33,29 @@
; don't forget that I'm returning *code* - return a call to the function
(datum->syntax stx `(make-dispatcher-tree ,ds)))
; guard that the page returned a response, otherwise print more detailed debugging information
(define-syntax-rule (page ds name)
(λ (req)
(define dispatcher (hash-ref ds (quote name)))
(define page-response (dispatcher req))
(if (response? page-response)
page-response
(response/output
#:code 500
#:mime-type #"text/plain"
(λ (out)
(for ([port (list (current-error-port) out)])
(parameterize ([current-output-port port])
(printf "error in ~a:~n expected page to return a response~n actually returned: ~v~n"
(quote name)
page-response))))))))
(define (make-dispatcher-tree ds)
(define subdomain-dispatcher (hash-ref ds 'subdomain-dispatcher))
(define tree
(sequencer:make
subdomain-dispatcher
(pathprocedure:make "/" (hash-ref ds 'page-home))
(pathprocedure:make "/" (page ds page-home))
(pathprocedure:make "/proxy" (hash-ref ds 'page-proxy))
(pathprocedure:make "/search" (hash-ref ds 'page-global-search))
(pathprocedure:make "/set-user-settings" (hash-ref ds 'page-set-user-settings))
@ -48,7 +65,7 @@
(if (config-true? 'feature_offline::enabled)
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki-offline)))
(λ (_conn _req) (next-dispatcher)))
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki)))
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki)))
(filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search)))
(filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home)))
(if (config-true? 'feature_offline::enabled)

48
src/fandom-request.rkt Normal file
View file

@ -0,0 +1,48 @@
#lang typed/racket/base
(require "config.rkt"
"../lib/url-utils.rkt")
(define-type Headers (HashTable Symbol (U Bytes String)))
(require/typed net/http-easy
[#:opaque Timeout-Config timeout-config?]
[#:opaque Response response?]
[#:opaque Session session?]
[current-session (Parameter Session)]
[make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
[get ((U Bytes String)
[#:close? Boolean]
[#:headers Headers]
[#:timeouts Timeout-Config]
[#:max-attempts Exact-Positive-Integer]
[#:max-redirects Exact-Nonnegative-Integer]
[#:user-agent (U Bytes String)]
-> Response)])
(provide
fandom-get
fandom-get-api
timeouts)
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
(: no-headers Headers)
(define no-headers '#hasheq())
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
(define (fandom-get wikiname path #:headers [headers #f])
(define dest-url (string-append "https://www.fandom.com" path))
(define host (string-append wikiname ".fandom.com"))
(log-outgoing wikiname path)
(get dest-url
#:timeouts timeouts
#:headers (hash-set (or headers no-headers) 'Host host)))
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
(define (fandom-get-api wikiname params #:headers [headers #f])
(fandom-get wikiname
(string-append "/api.php?" (params->query params))
#:headers headers))
(: log-outgoing (String String -> Void))
(define (log-outgoing wikiname path)
(when (config-true? 'log_outgoing)
(printf "out: ~a ~a~n" wikiname path)))

View file

@ -15,11 +15,11 @@
"application-globals.rkt"
"config.rkt"
"data.rkt"
"fandom-request.rkt"
"page-wiki.rkt"
"../lib/syntax.rkt"
"../lib/thread-utils.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
@ -73,30 +73,24 @@
(define-values (members-data page-data siteinfo)
(thread-values
(λ ()
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "query")
("list" . "categorymembers")
("cmtitle" . ,prefixed-category)
("cmlimit" . "max")
("formatversion" . "2")
("format" . "json")))))
(log-outgoing dest-url)
(define dest-res (easy:get dest-url #:timeouts timeouts))
(easy:response-json dest-res))
(easy:response-json
(fandom-get-api
wikiname
`(("action" . "query")
("list" . "categorymembers")
("cmtitle" . ,prefixed-category)
("cmlimit" . "max")
("formatversion" . "2")
("format" . "json")))))
(λ ()
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "parse")
("page" . ,prefixed-category)
("prop" . "text|headhtml|langlinks")
("formatversion" . "2")
("format" . "json")))))
(log-outgoing dest-url)
(define dest-res (easy:get dest-url #:timeouts timeouts))
(easy:response-json dest-res))
(easy:response-json
(fandom-get-api
wikiname
`(("action" . "parse")
("page" . ,prefixed-category)
("prop" . "text|headhtml|langlinks")
("formatversion" . "2")
("format" . "json")))))
(λ ()
(siteinfo-fetch wikiname))))

View file

@ -15,11 +15,11 @@
"application-globals.rkt"
"config.rkt"
"data.rkt"
"fandom-request.rkt"
"page-wiki.rkt"
"../lib/syntax.rkt"
"../lib/thread-utils.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide page-file)
@ -40,8 +40,7 @@
(imageDescription . #f))))
(define (url-content-type url)
(log-outgoing url)
(define dest-res (easy:head url #:timeouts timeouts))
(define dest-res (easy:head url))
(easy:response-headers-ref dest-res 'content-type))
(define (get-media-html url content-type)
@ -106,20 +105,18 @@
(response-handler
(define wikiname (path/param-path (first (url-path (request-uri req)))))
(define prefixed-title (path/param-path (caddr (url-path (request-uri req)))))
(define origin (format "https://~a.fandom.com" wikiname))
(define source-url (format "~a/wiki/~a" origin prefixed-title))
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname prefixed-title))
(define-values (media-detail siteinfo)
(thread-values
(λ ()
(define dest-url
(format "~a/wikia.php?~a"
origin
(params->query `(("format" . "json") ("controller" . "Lightbox")
("method" . "getMediaDetail")
("fileTitle" . ,prefixed-title)))))
(log-outgoing dest-url)
(define dest-res (easy:get dest-url #:timeouts timeouts))
(define dest-res
(fandom-get
wikiname
(format "/wikia.php?~a"
(params->query `(("format" . "json") ("controller" . "Lightbox")
("method" . "getMediaDetail")
("fileTitle" . ,prefixed-title))))))
(easy:response-json dest-res))
(λ ()
(siteinfo-fetch wikiname))))

View file

@ -2,7 +2,6 @@
(require racket/dict
racket/list
racket/string
(prefix-in easy: net/http-easy)
; html libs
html-writing
; web server libs
@ -18,7 +17,6 @@
"../lib/syntax.rkt"
"../lib/thread-utils.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide

View file

@ -17,12 +17,12 @@
"application-globals.rkt"
"config.rkt"
"data.rkt"
"fandom-request.rkt"
"../lib/pure-utils.rkt"
"../lib/syntax.rkt"
"../lib/thread-utils.rkt"
"../lib/tree-updater.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
@ -38,25 +38,20 @@
(define (page-wiki req)
(define wikiname (path/param-path (first (url-path (request-uri req)))))
(define user-cookies (user-cookies-getter req))
(define origin (format "https://~a.fandom.com" wikiname))
(define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
(define-values (dest-res siteinfo)
(thread-values
(λ ()
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "parse")
("page" . ,path)
("prop" . "text|headhtml|langlinks")
("formatversion" . "2")
("format" . "json")))))
(log-outgoing dest-url)
(easy:get dest-url
#:timeouts timeouts
#:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies))))))
(fandom-get-api
wikiname
`(("action" . "parse")
("page" . ,path)
("prop" . "text|headhtml|langlinks")
("formatversion" . "2")
("format" . "json"))
#:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies))))))
(λ ()
(siteinfo-fetch wikiname))))
@ -103,4 +98,13 @@
#:code 200
#:headers headers
(λ (out)
(write-html body out))))))]))
(write-html body out))))))]
[(eq? 404 (easy:response-status-code dest-res))
(next-dispatcher)]
[else
(response-handler
(error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
wikiname
path
(easy:response-status-code dest-res)
(easy:response-body dest-res)))]))

View file

@ -3,8 +3,8 @@
(prefix-in easy: net/http-easy)
"application-globals.rkt"
"config.rkt"
"fandom-request.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
@ -17,20 +17,14 @@
'(#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))
(define (search-fandom wikiname query params)
;; constructing the URL where I want to get fandom data from...
(define origin (format "https://~a.fandom.com" wikiname))
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "query")
("list" . "search")
("srsearch" . ,query)
("formatversion" . "2")
("format" . "json")))))
;; HTTP request to dest-url for search results
(log-outgoing dest-url)
(define res (easy:get dest-url #:timeouts timeouts))
(define res
(fandom-get-api
wikiname
`(("action" . "query")
("list" . "search")
("srsearch" . ,query)
("formatversion" . "2")
("format" . "json"))))
(define json (easy:response-json res))
(define search-results (jp "/query/search" json))
(generate-results-content-fandom wikiname query search-results))

View file

@ -5,7 +5,6 @@
"application-globals.rkt"
"../lib/html-parsing/main.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
@ -37,7 +36,7 @@
("sort" . ,(cdr sort))))))
;; HTTP request to dest-url for search results
(log-outgoing dest-url)
(define res (easy:get dest-url #:timeouts timeouts))
(define res (easy:get dest-url #:timeouts (easy:make-timeout-config #:lease 5 #:connect 5)))
(define json (easy:response-json res))
;; build result objects

View file

@ -1,11 +0,0 @@
#lang typed/racket/base
(require "config.rkt")
(provide
; prints "out: <url>"
log-outgoing)
(: log-outgoing (String -> Void))
(define (log-outgoing url-string)
(when (config-true? 'log_outgoing)
(printf "out: ~a~n" url-string)))