forked from cadence/breezewiki
		
	Create archiver and offline code handlers
Somewhat messy. Will clean up gradually in future commits.
This commit is contained in:
		
							parent
							
								
									b8e5fb8dc5
								
							
						
					
					
						commit
						c7cce5479d
					
				
					 46 changed files with 4274 additions and 407 deletions
				
			
		
							
								
								
									
										28
									
								
								lib/archive-file-mappings.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								lib/archive-file-mappings.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,28 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/string
 | 
			
		||||
         net/url
 | 
			
		||||
         (only-in net/uri-codec uri-decode)
 | 
			
		||||
         "url-utils.rkt")
 | 
			
		||||
(provide
 | 
			
		||||
 local-encoded-url->segments
 | 
			
		||||
 url-segments->basename
 | 
			
		||||
 local-encoded-url->basename
 | 
			
		||||
 basename->name-for-query
 | 
			
		||||
 url-segments->guess-title)
 | 
			
		||||
 | 
			
		||||
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
 | 
			
		||||
  (map path/param-path (url-path (string->url str))))
 | 
			
		||||
 | 
			
		||||
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
 | 
			
		||||
  (define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
 | 
			
		||||
  (define basic-filename (string-join extra-encoded "#"))
 | 
			
		||||
  basic-filename)
 | 
			
		||||
 | 
			
		||||
(define (local-encoded-url->basename str) ; '("wiki" "Page_title"), no extension or dir prefix
 | 
			
		||||
  (url-segments->basename (local-encoded-url->segments str)))
 | 
			
		||||
 | 
			
		||||
(define (basename->name-for-query str)
 | 
			
		||||
  (uri-decode (regexp-replace* #rx"#" str "/")))
 | 
			
		||||
 | 
			
		||||
(define (url-segments->guess-title segments)
 | 
			
		||||
  (regexp-replace* #rx"_" (cadr segments) " "))
 | 
			
		||||
							
								
								
									
										1887
									
								
								lib/html-parsing/main.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1887
									
								
								lib/html-parsing/main.rkt
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										34
									
								
								lib/mime-types.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								lib/mime-types.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,34 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/contract
 | 
			
		||||
         racket/match
 | 
			
		||||
         racket/path
 | 
			
		||||
         racket/runtime-path
 | 
			
		||||
         racket/string)
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 (contract-out
 | 
			
		||||
  [ext->mime-type (-> bytes? bytes?)]
 | 
			
		||||
  [mime-type->ext (-> bytes? bytes?)]))
 | 
			
		||||
 | 
			
		||||
(define-runtime-path mime.types-path "mime.types")
 | 
			
		||||
 | 
			
		||||
(define ls
 | 
			
		||||
  (call-with-input-file mime.types-path
 | 
			
		||||
    (λ (in) (for/list ([line (in-lines in)]
 | 
			
		||||
                       #:when (not (regexp-match? #rx"^ *($|#)" line)))
 | 
			
		||||
              (match line
 | 
			
		||||
                [(regexp #rx"^([^ ]+) +(.+)$" (list _ mime ext))
 | 
			
		||||
                 (cons (string->bytes/utf-8 ext) (string->bytes/utf-8 mime))]
 | 
			
		||||
                [(regexp #rx"^ *#") (void)]
 | 
			
		||||
                [_ (log-warning "mime-types: failed to parse line ~s" line)])))))
 | 
			
		||||
 | 
			
		||||
(define forward-hash (make-immutable-hash ls))
 | 
			
		||||
(define reverse-hash (make-immutable-hash (map (λ (x) (cons (cdr x) (car x))) ls)))
 | 
			
		||||
 | 
			
		||||
(define (ext->mime-type ext-in)
 | 
			
		||||
  (define ext (regexp-replace #rx"^\\." ext-in #""))
 | 
			
		||||
  (hash-ref forward-hash ext))
 | 
			
		||||
 | 
			
		||||
(define (mime-type->ext m-in)
 | 
			
		||||
  (define m (regexp-replace #rx";.*" m-in #""))
 | 
			
		||||
  (hash-ref reverse-hash m))
 | 
			
		||||
							
								
								
									
										85
									
								
								lib/mime.types
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								lib/mime.types
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,85 @@
 | 
			
		|||
text/html                             html
 | 
			
		||||
text/css                              css
 | 
			
		||||
text/xml                              xml
 | 
			
		||||
image/gif                             gif
 | 
			
		||||
image/jpeg                            jpeg
 | 
			
		||||
application/javascript                js
 | 
			
		||||
text/javascript                       js
 | 
			
		||||
application/atom+xml                  atom
 | 
			
		||||
application/rss+xml                   rss
 | 
			
		||||
 | 
			
		||||
text/mathml                           mml
 | 
			
		||||
text/plain                            txt
 | 
			
		||||
text/x-component                      htc
 | 
			
		||||
 | 
			
		||||
image/png                             png
 | 
			
		||||
image/tiff                            tiff
 | 
			
		||||
image/vnd.wap.wbmp                    wbmp
 | 
			
		||||
image/x-icon                          ico
 | 
			
		||||
image/x-jng                           jng
 | 
			
		||||
image/x-ms-bmp                        bmp
 | 
			
		||||
image/svg+xml                         svg
 | 
			
		||||
image/webp                            webp
 | 
			
		||||
 | 
			
		||||
application/font-woff2                woff2
 | 
			
		||||
application/acad                      woff2
 | 
			
		||||
font/woff2                            woff2
 | 
			
		||||
application/font-woff                 woff
 | 
			
		||||
application/x-font-ttf                ttf
 | 
			
		||||
application/x-font-truetype           ttf
 | 
			
		||||
application/x-truetype-font           ttf
 | 
			
		||||
application/font-sfnt                 ttf
 | 
			
		||||
font/sfnt                             ttf
 | 
			
		||||
application/vnd.oasis.opendocument.formula-template otf
 | 
			
		||||
application/x-font-opentype           otf
 | 
			
		||||
application/vnd.ms-opentype           otf
 | 
			
		||||
font/otf                              otf
 | 
			
		||||
application/java-archive              jar
 | 
			
		||||
application/json                      json
 | 
			
		||||
application/mac-binhex40              hqx
 | 
			
		||||
application/msword                    doc
 | 
			
		||||
application/pdf                       pdf
 | 
			
		||||
application/postscript                ps
 | 
			
		||||
application/rtf                       rtf
 | 
			
		||||
application/vnd.apple.mpegurl         m3u8
 | 
			
		||||
application/vnd.ms-excel              xls
 | 
			
		||||
application/vnd.ms-fontobject         eot
 | 
			
		||||
application/vnd.ms-powerpoint         ppt
 | 
			
		||||
application/vnd.wap.wmlc              wmlc
 | 
			
		||||
application/vnd.google-earth.kml+xml  kml
 | 
			
		||||
application/vnd.google-earth.kmz      kmz
 | 
			
		||||
application/x-7z-compressed           7z
 | 
			
		||||
application/x-cocoa                   cco
 | 
			
		||||
application/x-java-archive-diff       jardiff
 | 
			
		||||
application/x-java-jnlp-file          jnlp
 | 
			
		||||
application/x-makeself                run
 | 
			
		||||
application/x-perl                    pl
 | 
			
		||||
application/x-rar-compressed          rar
 | 
			
		||||
application/x-redhat-package-manager  rpm
 | 
			
		||||
application/x-sea                     sea
 | 
			
		||||
application/x-shockwave-flash         swf
 | 
			
		||||
application/x-stuffit                 sit
 | 
			
		||||
application/x-tcl                     tcl
 | 
			
		||||
application/x-x509-ca-cert            pem
 | 
			
		||||
application/x-xpinstall               xpi
 | 
			
		||||
application/xhtml+xml                 xhtml
 | 
			
		||||
application/xspf+xml                  xspf
 | 
			
		||||
application/zip                       zip
 | 
			
		||||
application/gzip                      gz
 | 
			
		||||
 | 
			
		||||
audio/midi                            mid midi kar
 | 
			
		||||
audio/mpeg                            mp3
 | 
			
		||||
audio/ogg                             ogg
 | 
			
		||||
audio/x-m4a                           m4a
 | 
			
		||||
audio/x-realaudio                     ra
 | 
			
		||||
 | 
			
		||||
video/mp2t                            ts
 | 
			
		||||
video/mp4                             mp4
 | 
			
		||||
video/mpeg                            mpeg
 | 
			
		||||
video/quicktime                       mov
 | 
			
		||||
video/webm                            webm
 | 
			
		||||
video/x-flv                           flv
 | 
			
		||||
video/x-m4v                           m4v
 | 
			
		||||
video/x-mng                           mng
 | 
			
		||||
video/x-ms-wmv                        wmv
 | 
			
		||||
video/x-msvideo                       avi
 | 
			
		||||
							
								
								
									
										45
									
								
								lib/pure-utils.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								lib/pure-utils.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,45 @@
 | 
			
		|||
#lang typed/racket/base
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 ; call the updater on the dictionary key only if it has that key
 | 
			
		||||
 alist-maybe-update
 | 
			
		||||
 ; update a value only if a condition succeeds on it
 | 
			
		||||
 u
 | 
			
		||||
 ; like string-join, but for lists
 | 
			
		||||
 list-join
 | 
			
		||||
 u-counter)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require "typed-rackunit.rkt"))
 | 
			
		||||
 | 
			
		||||
(define u-counter (box 0))
 | 
			
		||||
 | 
			
		||||
(: alist-maybe-update (∀ (A B) ((Listof (Pairof A B)) A (B -> B) -> (Listof (Pairof A B)))))
 | 
			
		||||
(define (alist-maybe-update alist key updater)
 | 
			
		||||
  (set-box! u-counter (add1 (unbox u-counter)))
 | 
			
		||||
  (map (λ ([p : (Pairof A B)])
 | 
			
		||||
         (if (eq? (car p) key)
 | 
			
		||||
             (cons (car p) (updater (cdr p)))
 | 
			
		||||
             p))
 | 
			
		||||
       alist))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (alist-maybe-update '((a . 5) (b . 6)) 'a (λ ([x : Number]) (+ x 10)))
 | 
			
		||||
                '((a . 15) (b . 6)))
 | 
			
		||||
  (check-equal? (alist-maybe-update '((b . 6)) 'a (λ ([x : Number]) (+ x 10)))
 | 
			
		||||
                '((b . 6))))
 | 
			
		||||
 | 
			
		||||
(: u (∀ (A) ((A -> Any) (A -> A) A -> A)))
 | 
			
		||||
(define (u condition updater value)
 | 
			
		||||
  (set-box! u-counter (add1 (unbox u-counter)))
 | 
			
		||||
  (if (condition value) (updater value) value))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 4) -4)
 | 
			
		||||
  (check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 8) 8))
 | 
			
		||||
 | 
			
		||||
(: list-join (∀ (A B) (A (Listof B) -> (Listof (U A B)))))
 | 
			
		||||
(define (list-join element ls)
 | 
			
		||||
  (if (pair? (cdr ls))
 | 
			
		||||
      (list* (car ls) element (list-join element (cdr ls)))
 | 
			
		||||
      (list (car ls))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (list-join "h" '(2 3 4 5)) '(2 "h" 3 "h" 4 "h" 5)))
 | 
			
		||||
							
								
								
									
										161
									
								
								lib/syntax.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								lib/syntax.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,161 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require (for-syntax racket/base))
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 ; help make a nested if. if/in will gain the same false form of its containing if/out.
 | 
			
		||||
 if/out
 | 
			
		||||
 ; let, but the value for each variable is evaluated within a thread
 | 
			
		||||
 thread-let
 | 
			
		||||
 ; cond, but values can be defined between conditions
 | 
			
		||||
 cond/var
 | 
			
		||||
 ; wrap sql statements into lambdas so they can be executed during migration
 | 
			
		||||
 wrap-sql)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require rackunit)
 | 
			
		||||
  (define (check-syntax-equal? s1 s2)
 | 
			
		||||
    (check-equal? (syntax->datum s1)
 | 
			
		||||
                  (syntax->datum s2))))
 | 
			
		||||
 | 
			
		||||
;; actual transforming goes on in here.
 | 
			
		||||
;; it's in a submodule so that it can be required in both levels, for testing
 | 
			
		||||
 | 
			
		||||
(module transform racket/base
 | 
			
		||||
  (require racket/list)
 | 
			
		||||
 | 
			
		||||
  (provide
 | 
			
		||||
   transform-if/out
 | 
			
		||||
   transform-thread-let
 | 
			
		||||
   transform/out-cond/var)
 | 
			
		||||
 | 
			
		||||
  (define (transform-if/out stx)
 | 
			
		||||
    (define tree (cdr (syntax->datum stx))) ; condition true false
 | 
			
		||||
    (define else (cddr tree)) ; the else branch cons cell
 | 
			
		||||
    (define result
 | 
			
		||||
      (let walk ([node tree])
 | 
			
		||||
        (cond
 | 
			
		||||
          ; normally, node should be a full cons cell (a pair) but it might be something else.
 | 
			
		||||
          ; situation: reached the end of a list, empty cons cell
 | 
			
		||||
          [(null? node) node]
 | 
			
		||||
          ; situation: reached the end of a list, cons cdr was non-list
 | 
			
		||||
          [(symbol? node) node]
 | 
			
		||||
          ; normal situation, full cons cell
 | 
			
		||||
          ; -- don't go replacing through nested if/out
 | 
			
		||||
          [(and (pair? node) (eq? 'if/out (car node))) node]
 | 
			
		||||
          ; -- replace if/in
 | 
			
		||||
          [(and (pair? node) (eq? 'if/in (car node)))
 | 
			
		||||
           (append '(if) (walk (cdr node)) else)]
 | 
			
		||||
          ; recurse down pair head and tail
 | 
			
		||||
          [(pair? node) (cons (walk (car node)) (walk (cdr node)))]
 | 
			
		||||
          ; something else that can't be recursed into, so pass it through
 | 
			
		||||
          [#t node])))
 | 
			
		||||
    (datum->syntax stx (cons 'if result)))
 | 
			
		||||
 | 
			
		||||
  (define (transform-thread-let stx)
 | 
			
		||||
    (define tree (cdr (syntax->datum stx)))
 | 
			
		||||
    (define defs (car tree))
 | 
			
		||||
    (define forms (cdr tree))
 | 
			
		||||
    (when (eq? (length forms) 0)
 | 
			
		||||
      (error (format "thread-let: bad syntax (need some forms to execute after the threads)~n  forms: ~a" forms)))
 | 
			
		||||
    (define counter (build-list (length defs) values))
 | 
			
		||||
    (datum->syntax
 | 
			
		||||
     stx
 | 
			
		||||
     `(let ([chv (build-vector ,(length defs) (λ (_) (make-channel)))])
 | 
			
		||||
        ,@(map (λ (n)
 | 
			
		||||
                 (define def (list-ref defs n))
 | 
			
		||||
                 `(thread (λ () (channel-put (vector-ref chv ,n) (let _ () ,@(cdr def))))))
 | 
			
		||||
               counter)
 | 
			
		||||
        (let ,(map (λ (n)
 | 
			
		||||
                     (define def (list-ref defs n))
 | 
			
		||||
                     `(,(car def) (channel-get (vector-ref chv ,n))))
 | 
			
		||||
                   counter)
 | 
			
		||||
          ,@forms))))
 | 
			
		||||
 | 
			
		||||
  (define (transform/out-cond/var stx)
 | 
			
		||||
    (define tree (transform-cond/var (cdr (syntax->datum stx))))
 | 
			
		||||
    (datum->syntax
 | 
			
		||||
     stx
 | 
			
		||||
     tree))
 | 
			
		||||
 | 
			
		||||
  (define (transform-cond/var tree)
 | 
			
		||||
    (define-values (els temp) (splitf-at tree (λ (el) (and (pair? el) (not (eq? (car el) 'var))))))
 | 
			
		||||
    (define-values (vars rest) (splitf-at temp (λ (el) (and (pair? el) (eq? (car el) 'var)))))
 | 
			
		||||
    (if (null? rest)
 | 
			
		||||
        `(cond ,@els)
 | 
			
		||||
        `(cond
 | 
			
		||||
          ,@els
 | 
			
		||||
          [#t
 | 
			
		||||
           (let ,(for/list ([var vars])
 | 
			
		||||
                   (cdr var))
 | 
			
		||||
             ,(transform-cond/var rest))]))))
 | 
			
		||||
 | 
			
		||||
;; the syntax definitions and their tests go below here
 | 
			
		||||
 | 
			
		||||
(require 'transform (for-syntax 'transform))
 | 
			
		||||
 | 
			
		||||
(define-syntax (wrap-sql stx)
 | 
			
		||||
  ; the arguments
 | 
			
		||||
  (define xs (cdr (syntax->list stx)))
 | 
			
		||||
  ; wrap each argument
 | 
			
		||||
  (define wrapped (map (λ (xe) ; xe is the syntax of an argument
 | 
			
		||||
                         (if (list? (car (syntax->datum xe)))
 | 
			
		||||
                             ; it's a list of lists (a list of sql migration steps)
 | 
			
		||||
                             ; return instead syntax of a lambda that will call everything in xe
 | 
			
		||||
                             (datum->syntax stx `(λ () ,@xe))
 | 
			
		||||
                             ; it's just a single sql migration step
 | 
			
		||||
                             ; return instead syntax of a lambda that will call xe
 | 
			
		||||
                             (datum->syntax stx `(λ () ,xe))))
 | 
			
		||||
                       xs))
 | 
			
		||||
  ; since I'm returning *code*, I need to return the form (list ...) so that runtime makes a list
 | 
			
		||||
  (datum->syntax stx `(list ,@wrapped)))
 | 
			
		||||
 | 
			
		||||
(define-syntax (if/out stx)
 | 
			
		||||
  (transform-if/out stx))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-syntax-equal? (transform-if/out #'(if/out (condition 1) (if/in (condition 2) (do-yes)) (do-no)))
 | 
			
		||||
                       #'(if (condition 1) (if (condition 2) (do-yes) (do-no)) (do-no)))
 | 
			
		||||
  (check-equal? (if/out #t (if/in #t 'yes) 'no) 'yes)
 | 
			
		||||
  (check-equal? (if/out #f (if/in #t 'yes) 'no) 'no)
 | 
			
		||||
  (check-equal? (if/out #t (if/in #f 'yes) 'no) 'no)
 | 
			
		||||
  (check-equal? (if/out #f (if/in #f 'yes) 'no) 'no))
 | 
			
		||||
 | 
			
		||||
(define-syntax (thread-let stx)
 | 
			
		||||
  (transform-thread-let stx))
 | 
			
		||||
(module+ test
 | 
			
		||||
  ; check that it is transformed as expected
 | 
			
		||||
  (check-syntax-equal?
 | 
			
		||||
   (transform-thread-let
 | 
			
		||||
    #'(thread-let ([a (hey "this is a")]
 | 
			
		||||
                   [b (hey "this is b")])
 | 
			
		||||
                  (list a b)))
 | 
			
		||||
   #'(let ([chv (build-vector 2 (λ (_) (make-channel)))])
 | 
			
		||||
       (thread (λ () (channel-put (vector-ref chv 0) (let _ () (hey "this is a")))))
 | 
			
		||||
       (thread (λ () (channel-put (vector-ref chv 1) (let _ () (hey "this is b")))))
 | 
			
		||||
       (let ([a (channel-get (vector-ref chv 0))]
 | 
			
		||||
             [b (channel-get (vector-ref chv 1))])
 | 
			
		||||
         (list a b))))
 | 
			
		||||
  ; check that they actually execute concurrently
 | 
			
		||||
  (define ch (make-channel))
 | 
			
		||||
  (check-equal? (thread-let ([a (begin
 | 
			
		||||
                                  (channel-put ch 'a)
 | 
			
		||||
                                  (channel-get ch))]
 | 
			
		||||
                             [b (begin0
 | 
			
		||||
                                    (channel-get ch)
 | 
			
		||||
                                  (channel-put ch 'b))])
 | 
			
		||||
                            (list a b))
 | 
			
		||||
                '(b a))
 | 
			
		||||
  ; check that it assigns the correct value to the correct variable
 | 
			
		||||
  (check-equal? (thread-let ([a (sleep 0) 'a] [b 'b]) (list a b))
 | 
			
		||||
                '(a b)))
 | 
			
		||||
 | 
			
		||||
(define-syntax (cond/var stx)
 | 
			
		||||
  (transform/out-cond/var stx))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-syntax-equal? (transform/out-cond/var #'(cond/def [#f 0] (var d (* a 2)) [(eq? d 8) d] [#t "not 4"]))
 | 
			
		||||
                       #'(cond
 | 
			
		||||
                           [#f 0]
 | 
			
		||||
                           [#t
 | 
			
		||||
                            (let ([d (* a 2)])
 | 
			
		||||
                              (cond
 | 
			
		||||
                                [(eq? d 8) d]
 | 
			
		||||
                                [#t "not 4"]))])))
 | 
			
		||||
							
								
								
									
										284
									
								
								lib/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										284
									
								
								lib/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,284 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/dict
 | 
			
		||||
         racket/function
 | 
			
		||||
         racket/match
 | 
			
		||||
         racket/string
 | 
			
		||||
         "pure-utils.rkt"
 | 
			
		||||
         "url-utils.rkt"
 | 
			
		||||
         "xexpr-utils.rkt")
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 preprocess-html-wiki
 | 
			
		||||
 update-tree-wiki)
 | 
			
		||||
 | 
			
		||||
(define (preprocess-html-wiki html)
 | 
			
		||||
  (define ((rr* find replace) contents)
 | 
			
		||||
    (regexp-replace* find contents replace))
 | 
			
		||||
  ((compose1
 | 
			
		||||
    ; fix navbox list nesting
 | 
			
		||||
    ; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
 | 
			
		||||
    ; add a <ul> to make the parser happy
 | 
			
		||||
    ; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
 | 
			
		||||
    (rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
 | 
			
		||||
    ; change <figcaption><p> to <figcaption><span> to make the parser happy
 | 
			
		||||
    (rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
 | 
			
		||||
   html))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
 | 
			
		||||
                "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
 | 
			
		||||
  (check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> 	<p class=\"caption\">Caption text.</p></figcaption></figure>")
 | 
			
		||||
                "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require rackunit
 | 
			
		||||
           "html-parsing/main.rkt")
 | 
			
		||||
  (define wiki-document
 | 
			
		||||
    '(*TOP*
 | 
			
		||||
      (div (@ (class "mw-parser-output"))
 | 
			
		||||
           (aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default"))
 | 
			
		||||
                  (h2 (@ (class "pi-item pi-title") (data-source "title"))
 | 
			
		||||
                      "Infobox Title")
 | 
			
		||||
                  (figure (@ (class "pi-item pi-image") (data-source "image"))
 | 
			
		||||
                          (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title ""))
 | 
			
		||||
                             (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail")))))
 | 
			
		||||
                  (div (@ (class "pi-item pi-data") (data-source "description"))
 | 
			
		||||
                       (h3 (@ (class "pi-data-label"))
 | 
			
		||||
                           "Description")
 | 
			
		||||
                       (div (@ (class "pi-data-value"))
 | 
			
		||||
                            "Mystery infobox!")))
 | 
			
		||||
           (div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed"))
 | 
			
		||||
                (i (b "This section is hidden for dramatic effect."))
 | 
			
		||||
                (div (@ (class "collapsible-content"))
 | 
			
		||||
                     (p "Another page link: "
 | 
			
		||||
                        (a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page"))
 | 
			
		||||
                           "Another Page"))))
 | 
			
		||||
           (figure (@ (class "thumb tnone"))
 | 
			
		||||
                   (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a))
 | 
			
		||||
                      (img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D")
 | 
			
		||||
                              (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
 | 
			
		||||
                              (class "thumbimage lazyload"))))
 | 
			
		||||
                   (noscript
 | 
			
		||||
                    (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image"))
 | 
			
		||||
                       (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
 | 
			
		||||
                               (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
 | 
			
		||||
                               (class "thumbimage")))))
 | 
			
		||||
                   (figcaption "Test figure!"))
 | 
			
		||||
           (iframe (@ (src "https://example.com/iframe-src")))))))
 | 
			
		||||
 | 
			
		||||
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
 | 
			
		||||
  (define classlist-updater
 | 
			
		||||
    (compose1
 | 
			
		||||
     ; uncollapse all navbox items (bottom of page mass navigation)
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (classlist) (and ; removed due to scoping, would improve peformance (eq? element-type 'table)
 | 
			
		||||
                            (member "navbox" classlist)
 | 
			
		||||
                            (member "collapsed" classlist)))
 | 
			
		||||
            (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))
 | 
			
		||||
     ; uncollapse portable-infobox sections
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (classlist) (and ; removed due to scoping, would improve performance (eq? element-type 'section)
 | 
			
		||||
                            (member "pi-collapse" classlist)))
 | 
			
		||||
            (λ (classlist) (filter (λ (v)
 | 
			
		||||
                                     (and (not (equal? v "pi-collapse-closed"))
 | 
			
		||||
                                          (not (equal? v "pi-collapse"))))
 | 
			
		||||
                                   classlist)))
 | 
			
		||||
     ; generic: includes article sections and tables, probably more
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (classlist) (and (member "collapsible" classlist)
 | 
			
		||||
                                (member "collapsed" classlist)))
 | 
			
		||||
            (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))))
 | 
			
		||||
 | 
			
		||||
  (define ((string-replace-curried from to) str)
 | 
			
		||||
    (string-replace str from to))
 | 
			
		||||
 | 
			
		||||
  (define class-updater
 | 
			
		||||
    (compose1
 | 
			
		||||
     (string-replace-curried " collapsed" "")
 | 
			
		||||
     (string-replace-curried "pi-collapse-closed" "")
 | 
			
		||||
     (string-replace-curried "pi-collapse" "")))
 | 
			
		||||
 | 
			
		||||
  (define attributes-updater
 | 
			
		||||
    (compose1
 | 
			
		||||
     ; uncollapsing
 | 
			
		||||
     #;(curry attribute-maybe-update 'class
 | 
			
		||||
              (λ (class) (string-join (classlist-updater (string-split class " ")) " ")))
 | 
			
		||||
     (curry attribute-maybe-update 'class class-updater)
 | 
			
		||||
     ; change links to stay on the same wiki
 | 
			
		||||
     (curry attribute-maybe-update 'href
 | 
			
		||||
            (λ (href)
 | 
			
		||||
              ((compose1
 | 
			
		||||
                (λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname)))
 | 
			
		||||
                (λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2")))
 | 
			
		||||
               href)))
 | 
			
		||||
     ; add noreferrer to a.image
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (v) (and #;(eq? element-type 'a)
 | 
			
		||||
                        (has-class? "image" v)))
 | 
			
		||||
            (λ (v) (dict-update v 'rel (λ (s)
 | 
			
		||||
                                         (list (string-append (car s) " noreferrer")))
 | 
			
		||||
                                '(""))))
 | 
			
		||||
     ; proxy images from inline styles, if strict_proxy is set
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (v) strict-proxy?)
 | 
			
		||||
            (λ (v) (attribute-maybe-update
 | 
			
		||||
                    'style
 | 
			
		||||
                    (λ (style)
 | 
			
		||||
                      (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style
 | 
			
		||||
                                      (λ (whole url)
 | 
			
		||||
                                        (string-append
 | 
			
		||||
                                         "url("
 | 
			
		||||
                                         (u-proxy-url url)
 | 
			
		||||
                                         ")")))) v)))
 | 
			
		||||
     ; and also their links, if strict_proxy is set
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (v)
 | 
			
		||||
              (and strict-proxy?
 | 
			
		||||
                   #;(eq? element-type 'a)
 | 
			
		||||
                   (or (has-class? "image-thumbnail" v)
 | 
			
		||||
                       (has-class? "image" v))))
 | 
			
		||||
            (λ (v) (attribute-maybe-update 'href u-proxy-url v)))
 | 
			
		||||
     ; proxy images from src attributes, if strict_proxy is set
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (v) strict-proxy?)
 | 
			
		||||
            (λ (v) (attribute-maybe-update 'src u-proxy-url v)))
 | 
			
		||||
     ; don't lazyload images
 | 
			
		||||
     (curry u
 | 
			
		||||
            (λ (v) (dict-has-key? v 'data-src))
 | 
			
		||||
            (λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v)))
 | 
			
		||||
     ; don't use srcset - TODO: use srcset?
 | 
			
		||||
     (λ (v) (dict-remove v 'srcset))))
 | 
			
		||||
 | 
			
		||||
  (define (children-updater attributes children)
 | 
			
		||||
    ; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia
 | 
			
		||||
    ((λ (children)
 | 
			
		||||
       (u
 | 
			
		||||
        (λ (v) (has-class? "mw-collapsible-content" attributes))
 | 
			
		||||
        (λ (v) (for/list ([element v])
 | 
			
		||||
                 (u (λ (element) (pair? element))
 | 
			
		||||
                    (λ (element)
 | 
			
		||||
                      `(,(car element)
 | 
			
		||||
                        (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
 | 
			
		||||
                        ,@(filter element-is-content? (cdr element))))
 | 
			
		||||
                    element)))
 | 
			
		||||
        children))
 | 
			
		||||
     ; wrap blinking animated images in a slot so they can be animated with CSS
 | 
			
		||||
     ((λ (children)
 | 
			
		||||
        (u
 | 
			
		||||
         (λ (v) (and (has-class? "animated" attributes)
 | 
			
		||||
                     ((length v) . > . 1)))
 | 
			
		||||
         (λ (v)
 | 
			
		||||
           `((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v))))
 | 
			
		||||
                   (span (@ (class "animated-slot__inner"))
 | 
			
		||||
                         ,@v))))
 | 
			
		||||
         children))
 | 
			
		||||
      children)))
 | 
			
		||||
 | 
			
		||||
  (define (updater element element-type attributes children)
 | 
			
		||||
    ;; replace whole element?
 | 
			
		||||
    (cond
 | 
			
		||||
      ; wrap tables in a div.table-scroller
 | 
			
		||||
      [(and (eq? element-type 'table)
 | 
			
		||||
            (has-class? "wikitable" attributes)
 | 
			
		||||
            (not (dict-has-key? attributes 'data-scrolling)))
 | 
			
		||||
       `(div
 | 
			
		||||
         ((class "table-scroller"))
 | 
			
		||||
         ((,element-type (@ (data-scrolling) ,@attributes)
 | 
			
		||||
                         ,@children)))]
 | 
			
		||||
      ; exclude empty figcaptions
 | 
			
		||||
      [(and (eq? element-type 'figcaption)
 | 
			
		||||
            (or (eq? (length (filter element-is-element? children)) 0)
 | 
			
		||||
                ((query-selector (λ (element-type attributes children)
 | 
			
		||||
                                   (eq? element-type 'use))
 | 
			
		||||
                                 element))))
 | 
			
		||||
       return-no-element]
 | 
			
		||||
      ; exclude infobox items that are videos, and gallery items that are videos
 | 
			
		||||
      [(and (or (has-class? "pi-item" attributes)
 | 
			
		||||
                (has-class? "wikia-gallery-item" attributes))
 | 
			
		||||
            ((query-selector (λ (element-type attributes children)
 | 
			
		||||
                               (has-class? "video-thumbnail" attributes))
 | 
			
		||||
                             element)))
 | 
			
		||||
       return-no-element]
 | 
			
		||||
      ; exclude the invisible brackets after headings
 | 
			
		||||
      [(and (eq? element-type 'span)
 | 
			
		||||
            (has-class? "mw-editsection" attributes))
 | 
			
		||||
       return-no-element]
 | 
			
		||||
      ; display a link instead of an iframe
 | 
			
		||||
      [(eq? element-type 'iframe)
 | 
			
		||||
       (define src (car (dict-ref attributes 'src null)))
 | 
			
		||||
       `(a
 | 
			
		||||
         ((class "iframe-alternative") (href ,src))
 | 
			
		||||
         (,(format "Embedded media: ~a" src)))]
 | 
			
		||||
      ; remove noscript versions of images because they are likely lower quality than the script versions
 | 
			
		||||
      [(and (eq? element-type 'noscript)
 | 
			
		||||
            (match children
 | 
			
		||||
              ; either the noscript has a.image as a first child...
 | 
			
		||||
              [(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)]
 | 
			
		||||
              ; or the noscript has img as a first child
 | 
			
		||||
              [(list (list 'img _)) #t]
 | 
			
		||||
              [_ #f]))
 | 
			
		||||
       return-no-element]
 | 
			
		||||
      [#t
 | 
			
		||||
       (list element-type
 | 
			
		||||
             ;; attributes
 | 
			
		||||
             (attributes-updater #; element-type attributes)
 | 
			
		||||
             ;; children
 | 
			
		||||
             (children-updater attributes children))]))
 | 
			
		||||
 | 
			
		||||
  updater)
 | 
			
		||||
 | 
			
		||||
(define (update-tree-wiki tree wikiname #:strict-proxy? [strict-proxy? #f])
 | 
			
		||||
  (update-tree (updater wikiname #:strict-proxy? strict-proxy?) tree))
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (define transformed
 | 
			
		||||
    (update-tree-wiki wiki-document "test" #:strict-proxy? #t))
 | 
			
		||||
  ; check that wikilinks are changed to be local
 | 
			
		||||
  (check-equal? (get-attribute 'href (bits->attributes
 | 
			
		||||
                                      ((query-selector
 | 
			
		||||
                                        (λ (t a c) (dict-has-key? a 'data-test-wikilink))
 | 
			
		||||
                                        transformed))))
 | 
			
		||||
                "/test/wiki/Another_Page")
 | 
			
		||||
  ; check that a.image has noreferrer
 | 
			
		||||
  (check-equal? (get-attribute 'rel (bits->attributes
 | 
			
		||||
                                     ((query-selector
 | 
			
		||||
                                       (λ (t a c) (and (eq? t 'a)
 | 
			
		||||
                                                       (has-class? "image" a)))
 | 
			
		||||
                                       transformed))))
 | 
			
		||||
                " noreferrer")
 | 
			
		||||
  ; check that article collapse sections become uncollapsed
 | 
			
		||||
  (check-equal? (get-attribute 'class (bits->attributes
 | 
			
		||||
                                       ((query-selector
 | 
			
		||||
                                         (λ (t a c) (dict-has-key? a 'data-test-collapsesection))
 | 
			
		||||
                                         transformed))))
 | 
			
		||||
                "collapsible collapsetoggle-inline")
 | 
			
		||||
  ; check that iframes are gone
 | 
			
		||||
  (check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed)))
 | 
			
		||||
  (check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))]
 | 
			
		||||
                       [link ((query-selector (λ (t a c) (eq? t 'a)) alternative))])
 | 
			
		||||
                  (get-attribute 'href (bits->attributes link)))
 | 
			
		||||
                "https://example.com/iframe-src")
 | 
			
		||||
  ; check that images are proxied
 | 
			
		||||
  (check-equal? (get-attribute 'src (bits->attributes
 | 
			
		||||
                                     ((query-selector
 | 
			
		||||
                                       (λ (t a c) (eq? t 'img))
 | 
			
		||||
                                       transformed))))
 | 
			
		||||
                "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png")
 | 
			
		||||
  ; check that links to images are proxied
 | 
			
		||||
  (check-equal? (get-attribute 'href (bits->attributes
 | 
			
		||||
                                      ((query-selector
 | 
			
		||||
                                        (λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a)))
 | 
			
		||||
                                        transformed))))
 | 
			
		||||
                "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
 | 
			
		||||
  (check-equal? (get-attribute 'href (bits->attributes
 | 
			
		||||
                                      ((query-selector
 | 
			
		||||
                                        (λ (t a c) (member '(data-test-figure-a) a))
 | 
			
		||||
                                        transformed))))
 | 
			
		||||
                "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
 | 
			
		||||
  ; check that noscript images are removed
 | 
			
		||||
  (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
 | 
			
		||||
  ; benchmark
 | 
			
		||||
  (when (file-exists? "../misc/Frog.html")
 | 
			
		||||
    (with-input-from-file "../misc/Frog.html"
 | 
			
		||||
      (λ ()
 | 
			
		||||
        (define tree (html->xexp (current-input-port)))
 | 
			
		||||
        (time (length (update-tree-wiki tree "minecraft")))))))
 | 
			
		||||
							
								
								
									
										11
									
								
								lib/typed-rackunit.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								lib/typed-rackunit.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,11 @@
 | 
			
		|||
#lang typed/racket/base
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 check-equal?
 | 
			
		||||
 check-true
 | 
			
		||||
 check-false)
 | 
			
		||||
 | 
			
		||||
(require/typed rackunit
 | 
			
		||||
  [check-equal? (Any Any -> Void)]
 | 
			
		||||
  [check-true (Any -> Void)]
 | 
			
		||||
  [check-false (Any -> Void)])
 | 
			
		||||
							
								
								
									
										108
									
								
								lib/url-utils.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								lib/url-utils.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,108 @@
 | 
			
		|||
#lang typed/racket/base
 | 
			
		||||
(require racket/string
 | 
			
		||||
         "pure-utils.rkt")
 | 
			
		||||
(require/typed web-server/http/request-structs
 | 
			
		||||
               [#:opaque Header header?])
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 ; regex to match wiki names
 | 
			
		||||
 px-wikiname
 | 
			
		||||
 ; make a query string from an association list of strings
 | 
			
		||||
 params->query
 | 
			
		||||
 ; custom percent encoding (you probably want params->query instead)
 | 
			
		||||
 percent-encode
 | 
			
		||||
 ; sets for custom percent encoding
 | 
			
		||||
 path-set urlencoded-set filename-set
 | 
			
		||||
 ; make a proxied version of a fandom url
 | 
			
		||||
 u-proxy-url
 | 
			
		||||
 ; check whether a url is on a domain controlled by fandom
 | 
			
		||||
 is-fandom-url?
 | 
			
		||||
 ; pass in a header, headers, or something useless. they'll all combine into a list
 | 
			
		||||
 build-headers
 | 
			
		||||
 ; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
 | 
			
		||||
 page-title->path)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require "typed-rackunit.rkt"))
 | 
			
		||||
 | 
			
		||||
(define px-wikiname "[a-zA-Z0-9-]{1,50}")
 | 
			
		||||
 | 
			
		||||
;; https://url.spec.whatwg.org/#urlencoded-serializing
 | 
			
		||||
 | 
			
		||||
(define path-set '(#\; ; semicolon is part of the userinfo set in the URL standard, but I'm putting it here
 | 
			
		||||
                   #\? #\` #\{ #\} ; path set
 | 
			
		||||
                   #\  #\" #\# #\< #\> ; query set
 | 
			
		||||
                   ; c0 controls included elsewhere
 | 
			
		||||
                   ; higher ranges included elsewhere
 | 
			
		||||
                   ))
 | 
			
		||||
(define urlencoded-set (append
 | 
			
		||||
                        '(#\! #\' #\( #\) #\~ ; urlencoded set
 | 
			
		||||
                          #\$ #\% #\& #\+ #\, ; component set
 | 
			
		||||
                          #\/ #\: #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
 | 
			
		||||
                          )
 | 
			
		||||
                        path-set))
 | 
			
		||||
 | 
			
		||||
(define filename-set '(#\< #\> #\: #\" #\/ #\\ #\| #\? #\* #\# #\~ #\&))
 | 
			
		||||
 | 
			
		||||
(: percent-encode (String (Listof Char) Boolean -> Bytes))
 | 
			
		||||
(define (percent-encode value set space-as-plus)
 | 
			
		||||
  (define b (string->bytes/utf-8 value))
 | 
			
		||||
  (apply bytes-append
 | 
			
		||||
         (for/list ([char b]) : (Listof Bytes)
 | 
			
		||||
                   (cond
 | 
			
		||||
                     [(and space-as-plus (eq? char 32))
 | 
			
		||||
                      #"+"]
 | 
			
		||||
                     [(or (member (integer->char char) set)
 | 
			
		||||
                          (char . > . #x7E)
 | 
			
		||||
                          (char . <= . #x1F))
 | 
			
		||||
                      (bytes-append #"%" (string->bytes/latin-1
 | 
			
		||||
                                          (string-upcase (number->string char 16))))]
 | 
			
		||||
                     [#t
 | 
			
		||||
                      (bytes char)]))))
 | 
			
		||||
 | 
			
		||||
(: params->query ((Listof (Pair String String)) -> String))
 | 
			
		||||
(define (params->query params)
 | 
			
		||||
  (string-join
 | 
			
		||||
   (map (λ ([p : (Pair String String)])
 | 
			
		||||
          (format "~a=~a"
 | 
			
		||||
                  (percent-encode (car p) urlencoded-set #t)
 | 
			
		||||
                  (percent-encode (cdr p) urlencoded-set #t)))
 | 
			
		||||
        params)
 | 
			
		||||
   "&"))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (params->query '(("hello" . "world")))
 | 
			
		||||
                "hello=world")
 | 
			
		||||
  (check-equal? (params->query '(("a" . "hello world''") ("utf8" . "✓")))
 | 
			
		||||
                "a=hello+world%27%27&utf8=%E2%9C%93"))
 | 
			
		||||
 | 
			
		||||
(: is-fandom-url? (String -> Boolean))
 | 
			
		||||
(define (is-fandom-url? url)
 | 
			
		||||
  (regexp-match? (pregexp (format "^https://static\\.wikia\\.nocookie\\.net/|^https://~a\\.fandom\\.com/" px-wikiname)) url))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-true (is-fandom-url? "https://static.wikia.nocookie.net/wikiname/images/2/2f/SomeImage.jpg/revision/latest?cb=20110210094136"))
 | 
			
		||||
  (check-true (is-fandom-url? "https://test.fandom.com/wiki/Some_Page"))
 | 
			
		||||
  (check-false (is-fandom-url? "https://cadence.moe")))
 | 
			
		||||
 | 
			
		||||
(: u-proxy-url (String -> String))
 | 
			
		||||
(define (u-proxy-url url)
 | 
			
		||||
  (u
 | 
			
		||||
   is-fandom-url?
 | 
			
		||||
   (λ ([v : String]) (string-append "/proxy?" (params->query `(("dest" . ,url)))))
 | 
			
		||||
   url))
 | 
			
		||||
 | 
			
		||||
(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header)))
 | 
			
		||||
(define (build-headers . fs)
 | 
			
		||||
  (apply
 | 
			
		||||
   append
 | 
			
		||||
   (map (λ ([f : (U Header (Listof Header) False Void)])
 | 
			
		||||
         (cond
 | 
			
		||||
           [(not f) null]
 | 
			
		||||
           [(void? f) null]
 | 
			
		||||
           [(null? f) null]
 | 
			
		||||
           [(header? f) (list f)]
 | 
			
		||||
           [(pair? f) f]))
 | 
			
		||||
       fs)))
 | 
			
		||||
 | 
			
		||||
(: page-title->path (String -> Bytes))
 | 
			
		||||
(define (page-title->path title)
 | 
			
		||||
  (percent-encode (regexp-replace* " " title "_") path-set #f))
 | 
			
		||||
							
								
								
									
										217
									
								
								lib/xexpr-utils.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										217
									
								
								lib/xexpr-utils.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,217 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/dict
 | 
			
		||||
         racket/function
 | 
			
		||||
         racket/generator
 | 
			
		||||
         racket/match
 | 
			
		||||
         racket/string
 | 
			
		||||
         (only-in json-pointer json-pointer-value)
 | 
			
		||||
         (only-in web-server/http response/output)
 | 
			
		||||
         "pure-utils.rkt")
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 ;; with whole xexprs
 | 
			
		||||
 ; xexpr for an "empty" element, which in reality uses <template>
 | 
			
		||||
 return-no-element
 | 
			
		||||
 ; query a tree for elements matching a condition
 | 
			
		||||
 query-selector
 | 
			
		||||
 ; update a tree with a function called on each element
 | 
			
		||||
 update-tree
 | 
			
		||||
 | 
			
		||||
 ;; with bits of xexprs
 | 
			
		||||
 ; predicates
 | 
			
		||||
 element-is-bits?
 | 
			
		||||
 element-is-xattributes?
 | 
			
		||||
 element-is-element?
 | 
			
		||||
 element-is-content?
 | 
			
		||||
 | 
			
		||||
 ;; with attributes
 | 
			
		||||
 ; find the attributes in some bits of an element
 | 
			
		||||
 bits->attributes
 | 
			
		||||
 ; get attribute value from some attributes
 | 
			
		||||
 get-attribute
 | 
			
		||||
 ; update an attribute if it is present (otherwise no change)
 | 
			
		||||
 attribute-maybe-update
 | 
			
		||||
 ; make an attribute selector for use in query-selector
 | 
			
		||||
 attribute-selector
 | 
			
		||||
 ; do these attributes have a certain value in their class?
 | 
			
		||||
 has-class?
 | 
			
		||||
 | 
			
		||||
 ;; with json
 | 
			
		||||
 ; get value in json structure using a *j*son *p*ointer, optionally with default value for not present
 | 
			
		||||
 jp
 | 
			
		||||
 | 
			
		||||
 ; error catching for http responses
 | 
			
		||||
 response-handler)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require rackunit)
 | 
			
		||||
  (define demo-attributes
 | 
			
		||||
    '(span (@ (title "Inside joke."))
 | 
			
		||||
           "To get to the other side."
 | 
			
		||||
           (@ (style "color: blue"))))
 | 
			
		||||
  (define demo-document
 | 
			
		||||
    '(html
 | 
			
		||||
      (@ (lang "en"))
 | 
			
		||||
      (head
 | 
			
		||||
       (title "Hello world!"))
 | 
			
		||||
      (body
 | 
			
		||||
       (h1 "Hello world!")
 | 
			
		||||
       (p "Welcome to my "
 | 
			
		||||
          (span (@ (style "color: yellow")
 | 
			
		||||
                   (title "Really."))
 | 
			
		||||
                (em "cool"))
 | 
			
		||||
          "website.")))))
 | 
			
		||||
 | 
			
		||||
; replacing with a template element removes it from the rendered document
 | 
			
		||||
(define return-no-element '(template
 | 
			
		||||
                            ()
 | 
			
		||||
                            ()))
 | 
			
		||||
 | 
			
		||||
; "bits" is attributes or real elements (non-string)
 | 
			
		||||
(define (element-is-bits? element)
 | 
			
		||||
  (pair? element))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-true (element-is-bits? '(span "hi")))
 | 
			
		||||
  (check-true (element-is-bits? '(@ (alt "Cute cat."))))
 | 
			
		||||
  (check-false (element-is-bits? "hi")))
 | 
			
		||||
 | 
			
		||||
; "xattributes" is attributes hugged by @
 | 
			
		||||
(define (element-is-xattributes? element)
 | 
			
		||||
  (and (element-is-bits? element) (eq? '@ (car element))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-false (element-is-xattributes? '(span "hi")))
 | 
			
		||||
  (check-true (element-is-xattributes? '(@ (alt "Cute cat."))))
 | 
			
		||||
  (check-false (element-is-xattributes? '((alt "Cute cat."))))
 | 
			
		||||
  (check-false (element-is-xattributes? "hi")))
 | 
			
		||||
 | 
			
		||||
; "element" is a real element with a type and everything (non-string, non-attributes)
 | 
			
		||||
(define (element-is-element? element)
 | 
			
		||||
  (and (element-is-bits? element) (not (element-is-xattributes? element))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-true (element-is-element? '(span "hi")))
 | 
			
		||||
  (check-false (element-is-element? '(@ (alt "Cute cat."))))
 | 
			
		||||
  (check-false (element-is-element? "hi")))
 | 
			
		||||
 | 
			
		||||
; "element content" is a real element or a string
 | 
			
		||||
(define (element-is-content? element)
 | 
			
		||||
  (or (string? element) (element-is-element? element)))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-true (element-is-content? '(span "hi")))
 | 
			
		||||
  (check-false (element-is-content? '(@ (alt "Cute cat."))))
 | 
			
		||||
  (check-true (element-is-content? "hi")))
 | 
			
		||||
 | 
			
		||||
; get the actual attributes, leaving out the @ signs
 | 
			
		||||
(define (xattributes->attributes xattrs)
 | 
			
		||||
  (filter pair? xattrs))
 | 
			
		||||
 | 
			
		||||
(define (bits->attributes bits)
 | 
			
		||||
 ; (append) is a clean and general approach to finding and combining any attributes
 | 
			
		||||
  (xattributes->attributes (apply append (filter element-is-xattributes? bits))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (bits->attributes demo-attributes)
 | 
			
		||||
                '((title "Inside joke.") (style "color: blue"))))
 | 
			
		||||
 | 
			
		||||
(define (get-attribute name attributes)
 | 
			
		||||
  (define a (assq name attributes))
 | 
			
		||||
  (if (pair? a)
 | 
			
		||||
      (cadr a)
 | 
			
		||||
      #f))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (get-attribute 'title (bits->attributes demo-attributes)) "Inside joke."))
 | 
			
		||||
 | 
			
		||||
(define (attribute-maybe-update key updater attributes)
 | 
			
		||||
  (alist-maybe-update attributes key (λ (v) (map updater v))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-equal? (attribute-maybe-update 'a (λ (x) (+ x 10)) '((a 5) (b 6)))
 | 
			
		||||
                '((a 15) (b 6))))
 | 
			
		||||
 | 
			
		||||
(define (attribute-selector name value)
 | 
			
		||||
  (λ (element-type attributes children)
 | 
			
		||||
    (equal? (get-attribute name attributes) value)))
 | 
			
		||||
 | 
			
		||||
(define (query-selector selector element #:include-text? [include-text? #f])
 | 
			
		||||
  (generator
 | 
			
		||||
   ()
 | 
			
		||||
   (let loop ([element element])
 | 
			
		||||
     (define element-type (car element))
 | 
			
		||||
     (define attributes (bits->attributes (cdr element)))
 | 
			
		||||
     (define children (filter element-is-element? (cdr element))) ; only recurse through real children
 | 
			
		||||
     (cond
 | 
			
		||||
       [(equal? element-type '*DECL*) #f]
 | 
			
		||||
       [(equal? element-type '@) #f]
 | 
			
		||||
       [#t
 | 
			
		||||
        (when (if include-text?
 | 
			
		||||
                   (selector element-type attributes children (filter string? (cdr element)))
 | 
			
		||||
                   (selector element-type attributes children))
 | 
			
		||||
          (yield element))
 | 
			
		||||
        (for ([child children]) (loop child))]))
 | 
			
		||||
  #f))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (let ([result (query-selector (attribute-selector 'title "Really.")
 | 
			
		||||
                                demo-document)])
 | 
			
		||||
    (check-equal? (result) '(span (@ (style "color: yellow")
 | 
			
		||||
                                     (title "Really."))
 | 
			
		||||
                                  (em "cool")))
 | 
			
		||||
    (check-equal? (result) #f)))
 | 
			
		||||
 | 
			
		||||
(define (update-tree transformer element)
 | 
			
		||||
  (let loop ([element element])
 | 
			
		||||
    (define element-type (car element))
 | 
			
		||||
    (define attributes (bits->attributes (cdr element)))
 | 
			
		||||
    (define contents (filter element-is-content? (cdr element))) ; provide elements and strings
 | 
			
		||||
    (cond
 | 
			
		||||
      [(equal? element-type '*DECL*)
 | 
			
		||||
       ; declarations like <!DOCTYPE html> get mapped as attributes as if the element were (*DECL* (@ (DOCTYPE) (html)))
 | 
			
		||||
       (match (transformer element element-type (map list (cdr element)) null)
 | 
			
		||||
         [(list element-type attributes contents)
 | 
			
		||||
          `(*DECL* ,@(map car attributes))]
 | 
			
		||||
         [#f ""])]
 | 
			
		||||
      [(member element-type '(@ &))
 | 
			
		||||
       ; special element, do nothing
 | 
			
		||||
       element]
 | 
			
		||||
      [#t
 | 
			
		||||
       ; regular element, transform it
 | 
			
		||||
       (match (transformer element element-type attributes contents)
 | 
			
		||||
         [(list element-type attributes contents)
 | 
			
		||||
          (append (list element-type)
 | 
			
		||||
                  (if (pair? attributes) (list (append '(@) attributes)) (list))
 | 
			
		||||
                  (map (λ (content)
 | 
			
		||||
                         (if (element-is-element? content) (loop content) content))
 | 
			
		||||
                       contents))])])))
 | 
			
		||||
(module+ test
 | 
			
		||||
  ; check doctype is preserved when present
 | 
			
		||||
  (check-equal? (update-tree (λ (e t a c) (list t a c)) '(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
 | 
			
		||||
                '(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
 | 
			
		||||
  ; check doctype can be removed if desirable
 | 
			
		||||
  (check-equal? (update-tree (λ (e t a c) (if (eq? t '*DECL*) #f (list t a c))) '(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
 | 
			
		||||
                '(*TOP* "" (html (body "Hey"))))
 | 
			
		||||
  ; check (& x) sequences are preserved
 | 
			
		||||
  (check-equal? (update-tree (λ (e t a c) (list t a c)) '(body "Hey" (& nbsp) (a (@ (href "/")))))
 | 
			
		||||
                '(body "Hey" (& nbsp) (a (@ (href "/"))))))
 | 
			
		||||
 | 
			
		||||
(define (has-class? name attributes)
 | 
			
		||||
  (and (member name (string-split (or (get-attribute 'class attributes) "") " ")) #t))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (check-true (has-class? "red" '((class "yellow red blue"))))
 | 
			
		||||
  (check-false (has-class? "red" '((class "yellow blue"))))
 | 
			
		||||
  (check-false (has-class? "red" '((title "Inside joke.")))))
 | 
			
		||||
 | 
			
		||||
(define (jp pointer document [else null])
 | 
			
		||||
  (with-handlers ([exn:fail:contract? (λ (exn) (cond
 | 
			
		||||
                                                 [(null? else) (raise exn)]
 | 
			
		||||
                                                 [(procedure? else) (else)]
 | 
			
		||||
                                                 [#t else]))])
 | 
			
		||||
    (json-pointer-value pointer document)))
 | 
			
		||||
 | 
			
		||||
(define-syntax-rule (response-handler body ...)
 | 
			
		||||
  (with-handlers ([exn:fail? (λ (e)
 | 
			
		||||
                               (response/output
 | 
			
		||||
                                #:code 500
 | 
			
		||||
                                #:mime-type #"text/plain"
 | 
			
		||||
                                (λ (out)
 | 
			
		||||
                                  (for ([port (list (current-error-port) out)])
 | 
			
		||||
                                    (parameterize ([current-error-port port])
 | 
			
		||||
                                      (with-handlers ([exn:fail? (λ (e) (void))])
 | 
			
		||||
                                        (displayln "Exception raised in Racket code at response generation time:" (current-error-port))
 | 
			
		||||
                                        ((error-display-handler) (exn-message e) e)))))))])
 | 
			
		||||
    body ...))
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue