forked from cadence/breezewiki
Add user agent and detect blocked pages
This commit is contained in:
parent
97c4e54f38
commit
443f1eecbc
2 changed files with 59 additions and 9 deletions
|
@ -1,12 +1,16 @@
|
||||||
#lang typed/racket/base
|
#lang typed/racket/base
|
||||||
(require "config.rkt"
|
(require racket/format
|
||||||
|
racket/string
|
||||||
|
"config.rkt"
|
||||||
"../lib/url-utils.rkt")
|
"../lib/url-utils.rkt")
|
||||||
(define-type Headers (HashTable Symbol (U Bytes String)))
|
(define-type Headers (HashTable Symbol (U Bytes String)))
|
||||||
(require/typed net/http-easy
|
(require/typed net/http-easy
|
||||||
[#:opaque Timeout-Config timeout-config?]
|
[#:opaque Timeout-Config timeout-config?]
|
||||||
[#:opaque Response response?]
|
[#:opaque Response response?]
|
||||||
[#:opaque Session session?]
|
[#:opaque Session session?]
|
||||||
|
[response-status-code (Response -> Natural)]
|
||||||
[current-session (Parameter Session)]
|
[current-session (Parameter Session)]
|
||||||
|
[current-user-agent (Parameter (U Bytes String))]
|
||||||
[make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
|
[make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
|
||||||
[get ((U Bytes String)
|
[get ((U Bytes String)
|
||||||
[#:close? Boolean]
|
[#:close? Boolean]
|
||||||
|
@ -22,19 +26,41 @@
|
||||||
fandom-get-api
|
fandom-get-api
|
||||||
timeouts)
|
timeouts)
|
||||||
|
|
||||||
|
(unless (string-contains? (~a (current-user-agent)) "BreezeWiki")
|
||||||
|
(current-user-agent
|
||||||
|
(format "BreezeWiki/1.0 (~a) ~a"
|
||||||
|
(if (config-true? 'canonical_origin)
|
||||||
|
(config-get 'canonical_origin)
|
||||||
|
"local")
|
||||||
|
(current-user-agent))))
|
||||||
|
|
||||||
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
|
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
|
||||||
|
|
||||||
|
(: last-failure Flonum)
|
||||||
|
(define last-failure 0.0)
|
||||||
|
(: stored-failure (Option Response))
|
||||||
|
(define stored-failure #f)
|
||||||
|
(define failure-persist-time 30000)
|
||||||
|
|
||||||
(: no-headers Headers)
|
(: no-headers Headers)
|
||||||
(define no-headers '#hasheq())
|
(define no-headers '#hasheq())
|
||||||
|
|
||||||
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
|
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
|
||||||
(define (fandom-get wikiname path #:headers [headers #f])
|
(define (fandom-get wikiname path #:headers [headers #f])
|
||||||
(define dest-url (string-append "https://www.fandom.com" path))
|
(or
|
||||||
(define host (string-append wikiname ".fandom.com"))
|
(and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure)
|
||||||
(log-outgoing wikiname path)
|
(let ()
|
||||||
(get dest-url
|
(define dest-url (string-append "https://www.fandom.com" path))
|
||||||
#:timeouts timeouts
|
(define host (string-append wikiname ".fandom.com"))
|
||||||
#:headers (hash-set (or headers no-headers) 'Host host)))
|
(log-outgoing wikiname path)
|
||||||
|
(define res
|
||||||
|
(get dest-url
|
||||||
|
#:timeouts timeouts
|
||||||
|
#:headers (hash-set (or headers no-headers) 'Host host)))
|
||||||
|
(when (memq (response-status-code res) '(403 406))
|
||||||
|
(set! last-failure (current-inexact-milliseconds))
|
||||||
|
(set! stored-failure res))
|
||||||
|
res)))
|
||||||
|
|
||||||
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
|
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
|
||||||
(define (fandom-get-api wikiname params #:headers [headers #f])
|
(define (fandom-get-api wikiname params #:headers [headers #f])
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
"config.rkt"
|
"config.rkt"
|
||||||
"data.rkt"
|
"data.rkt"
|
||||||
"fandom-request.rkt"
|
"fandom-request.rkt"
|
||||||
|
"../lib/archive-file-mappings.rkt"
|
||||||
"../lib/pure-utils.rkt"
|
"../lib/pure-utils.rkt"
|
||||||
"../lib/syntax.rkt"
|
"../lib/syntax.rkt"
|
||||||
"../lib/thread-utils.rkt"
|
"../lib/thread-utils.rkt"
|
||||||
|
@ -37,8 +38,9 @@
|
||||||
|
|
||||||
(define (page-wiki req)
|
(define (page-wiki req)
|
||||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||||
|
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
|
||||||
(define user-cookies (user-cookies-getter req))
|
(define user-cookies (user-cookies-getter req))
|
||||||
(define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
|
(define path (string-join (cdr segments) "/"))
|
||||||
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
|
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
|
||||||
|
|
||||||
(define-values (dest-res siteinfo)
|
(define-values (dest-res siteinfo)
|
||||||
|
@ -101,9 +103,31 @@
|
||||||
(write-html body out))))))]
|
(write-html body out))))))]
|
||||||
[(eq? 404 (easy:response-status-code dest-res))
|
[(eq? 404 (easy:response-status-code dest-res))
|
||||||
(next-dispatcher)]
|
(next-dispatcher)]
|
||||||
|
[(memq (easy:response-status-code dest-res) '(403 406))
|
||||||
|
(response-handler
|
||||||
|
(define body
|
||||||
|
(generate-wiki-page
|
||||||
|
`(div
|
||||||
|
(p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.")
|
||||||
|
(p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.")
|
||||||
|
(p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom."))))
|
||||||
|
#:req req
|
||||||
|
#:source-url source-url
|
||||||
|
#:wikiname wikiname
|
||||||
|
#:title (url-segments->guess-title segments)
|
||||||
|
#:siteinfo siteinfo))
|
||||||
|
(response/output
|
||||||
|
#:code 503
|
||||||
|
#:headers (build-headers
|
||||||
|
always-headers
|
||||||
|
(header #"Retry-After" #"30")
|
||||||
|
(header #"Cache-Control" #"max-age=30, public")
|
||||||
|
(header #"Refresh" #"35"))
|
||||||
|
(λ (out)
|
||||||
|
(write-html body out))))]
|
||||||
[else
|
[else
|
||||||
(response-handler
|
(response-handler
|
||||||
(error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
|
(error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
|
||||||
wikiname
|
wikiname
|
||||||
path
|
path
|
||||||
(easy:response-status-code dest-res)
|
(easy:response-status-code dest-res)
|
||||||
|
|
Loading…
Reference in a new issue