forked from cadence/breezewiki
		
	Add user agent and detect blocked pages
This commit is contained in:
		
							parent
							
								
									97c4e54f38
								
							
						
					
					
						commit
						443f1eecbc
					
				
					 2 changed files with 59 additions and 9 deletions
				
			
		|  | @ -1,12 +1,16 @@ | ||||||
| #lang typed/racket/base | #lang typed/racket/base | ||||||
| (require "config.rkt" | (require racket/format | ||||||
|  |          racket/string | ||||||
|  |          "config.rkt" | ||||||
|          "../lib/url-utils.rkt") |          "../lib/url-utils.rkt") | ||||||
| (define-type Headers (HashTable Symbol (U Bytes String))) | (define-type Headers (HashTable Symbol (U Bytes String))) | ||||||
| (require/typed net/http-easy | (require/typed net/http-easy | ||||||
|   [#:opaque Timeout-Config timeout-config?] |   [#:opaque Timeout-Config timeout-config?] | ||||||
|   [#:opaque Response response?] |   [#:opaque Response response?] | ||||||
|   [#:opaque Session session?] |   [#:opaque Session session?] | ||||||
|  |   [response-status-code (Response -> Natural)] | ||||||
|   [current-session (Parameter Session)] |   [current-session (Parameter Session)] | ||||||
|  |   [current-user-agent (Parameter (U Bytes String))] | ||||||
|   [make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)] |   [make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)] | ||||||
|   [get ((U Bytes String) |   [get ((U Bytes String) | ||||||
|         [#:close? Boolean] |         [#:close? Boolean] | ||||||
|  | @ -22,19 +26,41 @@ | ||||||
|  fandom-get-api |  fandom-get-api | ||||||
|  timeouts) |  timeouts) | ||||||
| 
 | 
 | ||||||
|  | (unless (string-contains? (~a (current-user-agent)) "BreezeWiki") | ||||||
|  |   (current-user-agent | ||||||
|  |    (format "BreezeWiki/1.0 (~a) ~a" | ||||||
|  |            (if (config-true? 'canonical_origin) | ||||||
|  |                (config-get 'canonical_origin) | ||||||
|  |                "local") | ||||||
|  |            (current-user-agent)))) | ||||||
|  | 
 | ||||||
| (define timeouts (make-timeout-config #:lease 5 #:connect 5)) | (define timeouts (make-timeout-config #:lease 5 #:connect 5)) | ||||||
| 
 | 
 | ||||||
|  | (: last-failure Flonum) | ||||||
|  | (define last-failure 0.0) | ||||||
|  | (: stored-failure (Option Response)) | ||||||
|  | (define stored-failure #f) | ||||||
|  | (define failure-persist-time 30000) | ||||||
|  | 
 | ||||||
| (: no-headers Headers) | (: no-headers Headers) | ||||||
| (define no-headers '#hasheq()) | (define no-headers '#hasheq()) | ||||||
| 
 | 
 | ||||||
| (: fandom-get (String String [#:headers (Option Headers)] -> Response)) | (: fandom-get (String String [#:headers (Option Headers)] -> Response)) | ||||||
| (define (fandom-get wikiname path #:headers [headers #f]) | (define (fandom-get wikiname path #:headers [headers #f]) | ||||||
|  |   (or | ||||||
|  |    (and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure) | ||||||
|  |    (let () | ||||||
|      (define dest-url (string-append "https://www.fandom.com" path)) |      (define dest-url (string-append "https://www.fandom.com" path)) | ||||||
|      (define host (string-append wikiname ".fandom.com")) |      (define host (string-append wikiname ".fandom.com")) | ||||||
|      (log-outgoing wikiname path) |      (log-outgoing wikiname path) | ||||||
|  |      (define res | ||||||
|        (get dest-url |        (get dest-url | ||||||
|             #:timeouts timeouts |             #:timeouts timeouts | ||||||
|             #:headers (hash-set (or headers no-headers) 'Host host))) |             #:headers (hash-set (or headers no-headers) 'Host host))) | ||||||
|  |      (when (memq (response-status-code res) '(403 406)) | ||||||
|  |        (set! last-failure (current-inexact-milliseconds)) | ||||||
|  |        (set! stored-failure res)) | ||||||
|  |      res))) | ||||||
| 
 | 
 | ||||||
| (: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response)) | (: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response)) | ||||||
| (define (fandom-get-api wikiname params #:headers [headers #f]) | (define (fandom-get-api wikiname params #:headers [headers #f]) | ||||||
|  |  | ||||||
|  | @ -18,6 +18,7 @@ | ||||||
|          "config.rkt" |          "config.rkt" | ||||||
|          "data.rkt" |          "data.rkt" | ||||||
|          "fandom-request.rkt" |          "fandom-request.rkt" | ||||||
|  |          "../lib/archive-file-mappings.rkt" | ||||||
|          "../lib/pure-utils.rkt" |          "../lib/pure-utils.rkt" | ||||||
|          "../lib/syntax.rkt" |          "../lib/syntax.rkt" | ||||||
|          "../lib/thread-utils.rkt" |          "../lib/thread-utils.rkt" | ||||||
|  | @ -37,8 +38,9 @@ | ||||||
| 
 | 
 | ||||||
| (define (page-wiki req) | (define (page-wiki req) | ||||||
|   (define wikiname (path/param-path (first (url-path (request-uri req))))) |   (define wikiname (path/param-path (first (url-path (request-uri req))))) | ||||||
|  |   (define segments (map path/param-path (cdr (url-path (request-uri req))))) | ||||||
|   (define user-cookies (user-cookies-getter req)) |   (define user-cookies (user-cookies-getter req)) | ||||||
|   (define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/")) |   (define path (string-join (cdr segments) "/")) | ||||||
|   (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) |   (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path)) | ||||||
| 
 | 
 | ||||||
|   (define-values (dest-res siteinfo) |   (define-values (dest-res siteinfo) | ||||||
|  | @ -101,9 +103,31 @@ | ||||||
|                (write-html body out))))))] |                (write-html body out))))))] | ||||||
|     [(eq? 404 (easy:response-status-code dest-res)) |     [(eq? 404 (easy:response-status-code dest-res)) | ||||||
|      (next-dispatcher)] |      (next-dispatcher)] | ||||||
|  |     [(memq (easy:response-status-code dest-res) '(403 406)) | ||||||
|  |      (response-handler | ||||||
|  |       (define body | ||||||
|  |         (generate-wiki-page | ||||||
|  |          `(div | ||||||
|  |            (p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.") | ||||||
|  |            (p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.") | ||||||
|  |            (p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom.")))) | ||||||
|  |          #:req req | ||||||
|  |          #:source-url source-url | ||||||
|  |          #:wikiname wikiname | ||||||
|  |          #:title (url-segments->guess-title segments) | ||||||
|  |          #:siteinfo siteinfo)) | ||||||
|  |       (response/output | ||||||
|  |        #:code 503 | ||||||
|  |        #:headers (build-headers | ||||||
|  |                   always-headers | ||||||
|  |                   (header #"Retry-After" #"30") | ||||||
|  |                   (header #"Cache-Control" #"max-age=30, public") | ||||||
|  |                   (header #"Refresh" #"35")) | ||||||
|  |        (λ (out) | ||||||
|  |          (write-html body out))))] | ||||||
|     [else |     [else | ||||||
|      (response-handler |      (response-handler | ||||||
|       (error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a" |       (error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a" | ||||||
|              wikiname |              wikiname | ||||||
|              path |              path | ||||||
|              (easy:response-status-code dest-res) |              (easy:response-status-code dest-res) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue