forked from cadence/breezewiki
		
	Add user agent and detect blocked pages
This commit is contained in:
		
							parent
							
								
									97c4e54f38
								
							
						
					
					
						commit
						443f1eecbc
					
				
					 2 changed files with 59 additions and 9 deletions
				
			
		| 
						 | 
				
			
			@ -1,12 +1,16 @@
 | 
			
		|||
#lang typed/racket/base
 | 
			
		||||
(require "config.rkt"
 | 
			
		||||
(require racket/format
 | 
			
		||||
         racket/string
 | 
			
		||||
         "config.rkt"
 | 
			
		||||
         "../lib/url-utils.rkt")
 | 
			
		||||
(define-type Headers (HashTable Symbol (U Bytes String)))
 | 
			
		||||
(require/typed net/http-easy
 | 
			
		||||
  [#:opaque Timeout-Config timeout-config?]
 | 
			
		||||
  [#:opaque Response response?]
 | 
			
		||||
  [#:opaque Session session?]
 | 
			
		||||
  [response-status-code (Response -> Natural)]
 | 
			
		||||
  [current-session (Parameter Session)]
 | 
			
		||||
  [current-user-agent (Parameter (U Bytes String))]
 | 
			
		||||
  [make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
 | 
			
		||||
  [get ((U Bytes String)
 | 
			
		||||
        [#:close? Boolean]
 | 
			
		||||
| 
						 | 
				
			
			@ -22,19 +26,41 @@
 | 
			
		|||
 fandom-get-api
 | 
			
		||||
 timeouts)
 | 
			
		||||
 | 
			
		||||
(unless (string-contains? (~a (current-user-agent)) "BreezeWiki")
 | 
			
		||||
  (current-user-agent
 | 
			
		||||
   (format "BreezeWiki/1.0 (~a) ~a"
 | 
			
		||||
           (if (config-true? 'canonical_origin)
 | 
			
		||||
               (config-get 'canonical_origin)
 | 
			
		||||
               "local")
 | 
			
		||||
           (current-user-agent))))
 | 
			
		||||
 | 
			
		||||
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
 | 
			
		||||
 | 
			
		||||
(: last-failure Flonum)
 | 
			
		||||
(define last-failure 0.0)
 | 
			
		||||
(: stored-failure (Option Response))
 | 
			
		||||
(define stored-failure #f)
 | 
			
		||||
(define failure-persist-time 30000)
 | 
			
		||||
 | 
			
		||||
(: no-headers Headers)
 | 
			
		||||
(define no-headers '#hasheq())
 | 
			
		||||
 | 
			
		||||
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
 | 
			
		||||
(define (fandom-get wikiname path #:headers [headers #f])
 | 
			
		||||
  (or
 | 
			
		||||
   (and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure)
 | 
			
		||||
   (let ()
 | 
			
		||||
     (define dest-url (string-append "https://www.fandom.com" path))
 | 
			
		||||
     (define host (string-append wikiname ".fandom.com"))
 | 
			
		||||
     (log-outgoing wikiname path)
 | 
			
		||||
     (define res
 | 
			
		||||
       (get dest-url
 | 
			
		||||
            #:timeouts timeouts
 | 
			
		||||
            #:headers (hash-set (or headers no-headers) 'Host host)))
 | 
			
		||||
     (when (memq (response-status-code res) '(403 406))
 | 
			
		||||
       (set! last-failure (current-inexact-milliseconds))
 | 
			
		||||
       (set! stored-failure res))
 | 
			
		||||
     res)))
 | 
			
		||||
 | 
			
		||||
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
 | 
			
		||||
(define (fandom-get-api wikiname params #:headers [headers #f])
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,7 @@
 | 
			
		|||
         "config.rkt"
 | 
			
		||||
         "data.rkt"
 | 
			
		||||
         "fandom-request.rkt"
 | 
			
		||||
         "../lib/archive-file-mappings.rkt"
 | 
			
		||||
         "../lib/pure-utils.rkt"
 | 
			
		||||
         "../lib/syntax.rkt"
 | 
			
		||||
         "../lib/thread-utils.rkt"
 | 
			
		||||
| 
						 | 
				
			
			@ -37,8 +38,9 @@
 | 
			
		|||
 | 
			
		||||
(define (page-wiki req)
 | 
			
		||||
  (define wikiname (path/param-path (first (url-path (request-uri req)))))
 | 
			
		||||
  (define segments (map path/param-path (cdr (url-path (request-uri req)))))
 | 
			
		||||
  (define user-cookies (user-cookies-getter req))
 | 
			
		||||
  (define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
 | 
			
		||||
  (define path (string-join (cdr segments) "/"))
 | 
			
		||||
  (define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
 | 
			
		||||
 | 
			
		||||
  (define-values (dest-res siteinfo)
 | 
			
		||||
| 
						 | 
				
			
			@ -101,9 +103,31 @@
 | 
			
		|||
               (write-html body out))))))]
 | 
			
		||||
    [(eq? 404 (easy:response-status-code dest-res))
 | 
			
		||||
     (next-dispatcher)]
 | 
			
		||||
    [(memq (easy:response-status-code dest-res) '(403 406))
 | 
			
		||||
     (response-handler
 | 
			
		||||
      (define body
 | 
			
		||||
        (generate-wiki-page
 | 
			
		||||
         `(div
 | 
			
		||||
           (p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.")
 | 
			
		||||
           (p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.")
 | 
			
		||||
           (p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom."))))
 | 
			
		||||
         #:req req
 | 
			
		||||
         #:source-url source-url
 | 
			
		||||
         #:wikiname wikiname
 | 
			
		||||
         #:title (url-segments->guess-title segments)
 | 
			
		||||
         #:siteinfo siteinfo))
 | 
			
		||||
      (response/output
 | 
			
		||||
       #:code 503
 | 
			
		||||
       #:headers (build-headers
 | 
			
		||||
                  always-headers
 | 
			
		||||
                  (header #"Retry-After" #"30")
 | 
			
		||||
                  (header #"Cache-Control" #"max-age=30, public")
 | 
			
		||||
                  (header #"Refresh" #"35"))
 | 
			
		||||
       (λ (out)
 | 
			
		||||
         (write-html body out))))]
 | 
			
		||||
    [else
 | 
			
		||||
     (response-handler
 | 
			
		||||
      (error 'page-wiki "Tried to load page ~a/~v~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
 | 
			
		||||
      (error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
 | 
			
		||||
             wikiname
 | 
			
		||||
             path
 | 
			
		||||
             (easy:response-status-code dest-res)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue