Add user agent and detect blocked pages
This commit is contained in:
parent
97c4e54f38
commit
443f1eecbc
2 changed files with 59 additions and 9 deletions
|
@ -1,12 +1,16 @@
|
|||
#lang typed/racket/base
|
||||
(require "config.rkt"
|
||||
(require racket/format
|
||||
racket/string
|
||||
"config.rkt"
|
||||
"../lib/url-utils.rkt")
|
||||
(define-type Headers (HashTable Symbol (U Bytes String)))
|
||||
(require/typed net/http-easy
|
||||
[#:opaque Timeout-Config timeout-config?]
|
||||
[#:opaque Response response?]
|
||||
[#:opaque Session session?]
|
||||
[response-status-code (Response -> Natural)]
|
||||
[current-session (Parameter Session)]
|
||||
[current-user-agent (Parameter (U Bytes String))]
|
||||
[make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
|
||||
[get ((U Bytes String)
|
||||
[#:close? Boolean]
|
||||
|
@ -22,19 +26,41 @@
|
|||
fandom-get-api
|
||||
timeouts)
|
||||
|
||||
(unless (string-contains? (~a (current-user-agent)) "BreezeWiki")
|
||||
(current-user-agent
|
||||
(format "BreezeWiki/1.0 (~a) ~a"
|
||||
(if (config-true? 'canonical_origin)
|
||||
(config-get 'canonical_origin)
|
||||
"local")
|
||||
(current-user-agent))))
|
||||
|
||||
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
|
||||
|
||||
(: last-failure Flonum)
|
||||
(define last-failure 0.0)
|
||||
(: stored-failure (Option Response))
|
||||
(define stored-failure #f)
|
||||
(define failure-persist-time 30000)
|
||||
|
||||
(: no-headers Headers)
|
||||
(define no-headers '#hasheq())
|
||||
|
||||
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
|
||||
(define (fandom-get wikiname path #:headers [headers #f])
|
||||
(define dest-url (string-append "https://www.fandom.com" path))
|
||||
(define host (string-append wikiname ".fandom.com"))
|
||||
(log-outgoing wikiname path)
|
||||
(get dest-url
|
||||
#:timeouts timeouts
|
||||
#:headers (hash-set (or headers no-headers) 'Host host)))
|
||||
(or
|
||||
(and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure)
|
||||
(let ()
|
||||
(define dest-url (string-append "https://www.fandom.com" path))
|
||||
(define host (string-append wikiname ".fandom.com"))
|
||||
(log-outgoing wikiname path)
|
||||
(define res
|
||||
(get dest-url
|
||||
#:timeouts timeouts
|
||||
#:headers (hash-set (or headers no-headers) 'Host host)))
|
||||
(when (memq (response-status-code res) '(403 406))
|
||||
(set! last-failure (current-inexact-milliseconds))
|
||||
(set! stored-failure res))
|
||||
res)))
|
||||
|
||||
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
|
||||
(define (fandom-get-api wikiname params #:headers [headers #f])
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
"config.rkt"
|
||||
"data.rkt"
|
||||
"fandom-request.rkt"
|
||||
"../lib/archive-file-mappings.rkt"
|
||||
"../lib/pure-utils.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/thread-utils.rkt"
|
||||
|
@ -37,8 +38,9 @@
|
|||
|
||||
(define (page-wiki req)
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
|
||||
(define user-cookies (user-cookies-getter req))
|
||||
(define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
|
||||
(define path (string-join (cdr segments) "/"))
|
||||
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
|
||||
|
||||
(define-values (dest-res siteinfo)
|
||||
|
@ -101,9 +103,31 @@
|
|||
(write-html body out))))))]
|
||||
[(eq? 404 (easy:response-status-code dest-res))
|
||||
(next-dispatcher)]
|
||||
[(memq (easy:response-status-code dest-res) '(403 406))
|
||||
(response-handler
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
`(div
|
||||
(p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.")
|
||||
(p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.")
|
||||
(p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom."))))
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title (url-segments->guess-title segments)
|
||||
#:siteinfo siteinfo))
|
||||
(response/output
|
||||
#:code 503
|
||||
#:headers (build-headers
|
||||
always-headers
|
||||
(header #"Retry-After" #"30")
|
||||
(header #"Cache-Control" #"max-age=30, public")
|
||||
(header #"Refresh" #"35"))
|
||||
(λ (out)
|
||||
(write-html body out))))]
|
||||
[else
|
||||
(response-handler
|
||||
(error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
|
||||
(error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
|
||||
wikiname
|
||||
path
|
||||
(easy:response-status-code dest-res)
|
||||
|
|
Loading…
Reference in a new issue