Optimise update-tree-wiki somewhat
This commit is contained in:
		
							parent
							
								
									ba3b39242c
								
							
						
					
					
						commit
						9fd2b4699d
					
				
					 2 changed files with 272 additions and 228 deletions
				
			
		|  | @ -19,6 +19,7 @@ | |||
|          "data.rkt" | ||||
|          "pure-utils.rkt" | ||||
|          "syntax.rkt" | ||||
|          "tree-updater.rkt" | ||||
|          "xexpr-utils.rkt" | ||||
|          "url-utils.rkt") | ||||
| 
 | ||||
|  | @ -30,51 +31,19 @@ | |||
|  preprocess-html-wiki) | ||||
| 
 | ||||
| (module+ test | ||||
|   (require rackunit) | ||||
|   (define wiki-document | ||||
|     '(*TOP* | ||||
|       (div (@ (class "mw-parser-output")) | ||||
|            (aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default")) | ||||
|                   (h2 (@ (class "pi-item pi-title") (data-source "title")) | ||||
|                       "Infobox Title") | ||||
|                   (figure (@ (class "pi-item pi-image") (data-source "image")) | ||||
|                           (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title "")) | ||||
|                              (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail"))))) | ||||
|                   (div (@ (class "pi-item pi-data") (data-source "description")) | ||||
|                        (h3 (@ (class "pi-data-label")) | ||||
|                            "Description") | ||||
|                        (div (@ (class "pi-data-value")) | ||||
|                             "Mystery infobox!"))) | ||||
|            (div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed")) | ||||
|                 (i (b "This section is hidden for dramatic effect.")) | ||||
|                 (div (@ (class "collapsible-content")) | ||||
|                      (p "Another page link: " | ||||
|                         (a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page")) | ||||
|                            "Another Page")))) | ||||
|            (figure (@ (class "thumb tnone")) | ||||
|                    (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a)) | ||||
|                       (img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D") | ||||
|                               (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                               (class "thumbimage lazyload")))) | ||||
|                    (noscript | ||||
|                     (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image")) | ||||
|                        (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                                (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                                (class "thumbimage"))))) | ||||
|                    (figcaption "Test figure!")) | ||||
|            (iframe (@ (src "https://example.com/iframe-src"))))))) | ||||
|   (require rackunit)) | ||||
| 
 | ||||
| (define (preprocess-html-wiki html) | ||||
|   (define (rr* find replace contents) | ||||
|   (define ((rr* find replace) contents) | ||||
|     (regexp-replace* find contents replace)) | ||||
|   ((compose1 | ||||
|     ; fix navbox list nesting | ||||
|     ; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree | ||||
|     ; add a <ul> to make the parser happy | ||||
|     ; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies | ||||
|     (curry rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2") | ||||
|     (rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2") | ||||
|     ; change <figcaption><p> to <figcaption><span> to make the parser happy | ||||
|     (curry rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>")) | ||||
|     (rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>")) | ||||
|    html)) | ||||
| (module+ test | ||||
|   (check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>") | ||||
|  | @ -82,198 +51,6 @@ | |||
|   (check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> 	<p class=\"caption\">Caption text.</p></figcaption></figure>") | ||||
|                 "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>")) | ||||
| 
 | ||||
| (define (update-tree-wiki tree wikiname) | ||||
|   (update-tree | ||||
|    (λ (element element-type attributes children) | ||||
|      ;; replace whole element? | ||||
|      (cond | ||||
|        ; wrap tables in a div.table-scroller | ||||
|        [(and (eq? element-type 'table) | ||||
|              (has-class? "wikitable" attributes) | ||||
|              (not (dict-has-key? attributes 'data-scrolling))) | ||||
|         `(div | ||||
|           ((class "table-scroller")) | ||||
|           ((,element-type (@ (data-scrolling) ,@attributes) | ||||
|                           ,@children)))] | ||||
|        ; exclude empty figcaptions | ||||
|        [(and (eq? element-type 'figcaption) | ||||
|              (or (eq? (length (filter element-is-element? children)) 0) | ||||
|                  ((query-selector (λ (element-type attributes children) | ||||
|                                     (eq? element-type 'use)) | ||||
|                                   element)))) | ||||
|         return-no-element] | ||||
|        ; exclude infobox items that are videos, and gallery items that are videos | ||||
|        [(and (or (has-class? "pi-item" attributes) | ||||
|                  (has-class? "wikia-gallery-item" attributes)) | ||||
|              ((query-selector (λ (element-type attributes children) | ||||
|                                 (has-class? "video-thumbnail" attributes)) | ||||
|                               element))) | ||||
|         return-no-element] | ||||
|        ; exclude the invisible brackets after headings | ||||
|        [(and (eq? element-type 'span) | ||||
|              (has-class? "mw-editsection" attributes)) | ||||
|         return-no-element] | ||||
|        ; display a link instead of an iframe | ||||
|        [(eq? element-type 'iframe) | ||||
|         (define src (car (dict-ref attributes 'src null))) | ||||
|         `(a | ||||
|           ((class "iframe-alternative") (href ,src)) | ||||
|           (,(format "Embedded media: ~a" src)))] | ||||
|        ; remove noscript versions of images because they are likely lower quality than the script versions | ||||
|        [(and (eq? element-type 'noscript) | ||||
|              (match children | ||||
|                ; either the noscript has a.image as a first child... | ||||
|                [(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)] | ||||
|                ; or the noscript has img as a first child | ||||
|                [(list (list 'img _)) #t] | ||||
|                [_ #f])) | ||||
|         return-no-element] | ||||
|        [#t | ||||
|         (list element-type | ||||
|               ;; attributes | ||||
|               ((compose1 | ||||
|                 ; uncollapsing | ||||
|                 (curry attribute-maybe-update 'class | ||||
|                        (λ (class) | ||||
|                          (string-join | ||||
|                           ((compose1 | ||||
|                             ; uncollapse all navbox items (bottom of page mass navigation) | ||||
|                             (curry u | ||||
|                                    (λ (classlist) (and (eq? element-type 'table) | ||||
|                                                        (member "navbox" classlist) | ||||
|                                                        (member "collapsed" classlist))) | ||||
|                                    (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))) | ||||
|                             ; uncollapse portable-infobox sections | ||||
|                             (curry u | ||||
|                                    (λ (classlist) (and (eq? element-type 'section) | ||||
|                                                        (member "pi-collapse" classlist))) | ||||
|                                    (λ (classlist) (filter (λ (v) | ||||
|                                                             (and (not (equal? v "pi-collapse-closed")) | ||||
|                                                                  (not (equal? v "pi-collapse")))) | ||||
|                                                           classlist))) | ||||
|                             ; generic: includes article sections and tables, probably more | ||||
|                             (curry u | ||||
|                                    (λ (classlist) (and (member "collapsible" classlist) | ||||
|                                                        (member "collapsed" classlist))) | ||||
|                                    (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))) | ||||
|                            (string-split class " ")) | ||||
|                           " "))) | ||||
|                 ; change links to stay on the same wiki | ||||
|                 (curry attribute-maybe-update 'href | ||||
|                        (λ (href) | ||||
|                          ((compose1 | ||||
|                            (λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname))) | ||||
|                            (λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2"))) | ||||
|                           href))) | ||||
|                 ; add noreferrer to a.image | ||||
|                 (curry u | ||||
|                        (λ (v) (and (eq? element-type 'a) | ||||
|                                    (has-class? "image" v))) | ||||
|                        (λ (v) (dict-update v 'rel (λ (s) | ||||
|                                                     (list (string-append (car s) " noreferrer"))) | ||||
|                                            '("")))) | ||||
|                 ; proxy images from inline styles, if strict_proxy is set | ||||
|                 (curry u | ||||
|                        (λ (v) (config-true? 'strict_proxy)) | ||||
|                        (λ (v) (attribute-maybe-update | ||||
|                                'style | ||||
|                                (λ (style) | ||||
|                                  (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style | ||||
|                                                  (λ (whole url) | ||||
|                                                    (string-append | ||||
|                                                     "url(" | ||||
|                                                     (u-proxy-url url) | ||||
|                                                     ")")))) v))) | ||||
|                 ; and also their links, if strict_proxy is set | ||||
|                 (curry u | ||||
|                        (λ (v) | ||||
|                          (and (config-true? 'strict_proxy) | ||||
|                               (eq? element-type 'a) | ||||
|                               (or (has-class? "image-thumbnail" v) | ||||
|                                   (has-class? "image" v)))) | ||||
|                        (λ (v) (attribute-maybe-update 'href u-proxy-url v))) | ||||
|                 ; proxy images from src attributes, if strict_proxy is set | ||||
|                 (curry u | ||||
|                        (λ (v) (config-true? 'strict_proxy)) | ||||
|                        (λ (v) (attribute-maybe-update 'src u-proxy-url v))) | ||||
|                 ; don't lazyload images | ||||
|                 (curry u | ||||
|                        (λ (v) (dict-has-key? v 'data-src)) | ||||
|                        (λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v))) | ||||
|                 ; don't use srcset - TODO: use srcset? | ||||
|                 (λ (v) (dict-remove v 'srcset))) | ||||
|                attributes) | ||||
|               ;; children | ||||
|               ((compose1 | ||||
|                 ; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia | ||||
|                 (curry u | ||||
|                        (λ (v) (has-class? "mw-collapsible-content" attributes)) | ||||
|                        (λ (v) (for/list ([element v]) | ||||
|                                 (u (λ (element) (pair? element)) | ||||
|                                    (λ (element) | ||||
|                                      `(,(car element) | ||||
|                                        (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) | ||||
|                                        ,@(filter element-is-content? (cdr element)))) | ||||
|                                    element)))) | ||||
|                 ; wrap blinking animated images in a slot so they can be animated with CSS | ||||
|                 (curry u | ||||
|                        (λ (v) (and (has-class? "animated" attributes) | ||||
|                                    ((length v) . > . 1))) | ||||
|                        (λ (v) | ||||
|                          `((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v)))) | ||||
|                                  (span (@ (class "animated-slot__inner")) | ||||
|                                        ,@v)))))) | ||||
|                children))])) | ||||
|    tree)) | ||||
| (module+ test | ||||
|   (define transformed | ||||
|     (parameterize ([(config-parameter 'strict_proxy) "true"]) | ||||
|       (update-tree-wiki wiki-document "test"))) | ||||
|   ; check that wikilinks are changed to be local | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (dict-has-key? a 'data-test-wikilink)) | ||||
|                                         transformed)))) | ||||
|                 "/test/wiki/Another_Page") | ||||
|   ; check that a.image has noreferrer | ||||
|   (check-equal? (get-attribute 'rel (bits->attributes | ||||
|                                      ((query-selector | ||||
|                                        (λ (t a c) (and (eq? t 'a) | ||||
|                                                        (has-class? "image" a))) | ||||
|                                        transformed)))) | ||||
|                 " noreferrer") | ||||
|   ; check that article collapse sections become uncollapsed | ||||
|   (check-equal? (get-attribute 'class (bits->attributes | ||||
|                                        ((query-selector | ||||
|                                          (λ (t a c) (dict-has-key? a 'data-test-collapsesection)) | ||||
|                                          transformed)))) | ||||
|                 "collapsible collapsetoggle-inline") | ||||
|   ; check that iframes are gone | ||||
|   (check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed))) | ||||
|   (check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))] | ||||
|                        [link ((query-selector (λ (t a c) (eq? t 'a)) alternative))]) | ||||
|                   (get-attribute 'href (bits->attributes link))) | ||||
|                 "https://example.com/iframe-src") | ||||
|   ; check that images are proxied | ||||
|   (check-equal? (get-attribute 'src (bits->attributes | ||||
|                                      ((query-selector | ||||
|                                        (λ (t a c) (eq? t 'img)) | ||||
|                                        transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png") | ||||
|   ; check that links to images are proxied | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a))) | ||||
|                                         transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (member '(data-test-figure-a) a)) | ||||
|                                         transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||
|   ; check that noscript images are removed | ||||
|   (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)) | ||||
| 
 | ||||
| (define (page-wiki req) | ||||
|   (define wikiname (path/param-path (first (url-path (request-uri req))))) | ||||
|   (define user-cookies (user-cookies-getter req)) | ||||
|  |  | |||
							
								
								
									
										267
									
								
								src/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								src/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,267 @@ | |||
| #lang racket/base | ||||
| (require racket/dict | ||||
|          racket/function | ||||
|          racket/match | ||||
|          racket/string | ||||
|          "config.rkt" | ||||
|          "pure-utils.rkt" | ||||
|          "url-utils.rkt" | ||||
|          "xexpr-utils.rkt") | ||||
| 
 | ||||
| (provide | ||||
|  update-tree-wiki) | ||||
| 
 | ||||
| (module+ test | ||||
|   (require rackunit | ||||
|            html-parsing) | ||||
|   (define wiki-document | ||||
|     '(*TOP* | ||||
|       (div (@ (class "mw-parser-output")) | ||||
|            (aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default")) | ||||
|                   (h2 (@ (class "pi-item pi-title") (data-source "title")) | ||||
|                       "Infobox Title") | ||||
|                   (figure (@ (class "pi-item pi-image") (data-source "image")) | ||||
|                           (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title "")) | ||||
|                              (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail"))))) | ||||
|                   (div (@ (class "pi-item pi-data") (data-source "description")) | ||||
|                        (h3 (@ (class "pi-data-label")) | ||||
|                            "Description") | ||||
|                        (div (@ (class "pi-data-value")) | ||||
|                             "Mystery infobox!"))) | ||||
|            (div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed")) | ||||
|                 (i (b "This section is hidden for dramatic effect.")) | ||||
|                 (div (@ (class "collapsible-content")) | ||||
|                      (p "Another page link: " | ||||
|                         (a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page")) | ||||
|                            "Another Page")))) | ||||
|            (figure (@ (class "thumb tnone")) | ||||
|                    (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a)) | ||||
|                       (img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D") | ||||
|                               (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                               (class "thumbimage lazyload")))) | ||||
|                    (noscript | ||||
|                     (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image")) | ||||
|                        (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                                (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||
|                                (class "thumbimage"))))) | ||||
|                    (figcaption "Test figure!")) | ||||
|            (iframe (@ (src "https://example.com/iframe-src"))))))) | ||||
| 
 | ||||
| (define (updater wikiname) | ||||
|   (define classlist-updater | ||||
|     (compose1 | ||||
|      ; uncollapse all navbox items (bottom of page mass navigation) | ||||
|      (curry u | ||||
|             (λ (classlist) (and ; removed due to scoping, would improve peformance (eq? element-type 'table) | ||||
|                             (member "navbox" classlist) | ||||
|                             (member "collapsed" classlist))) | ||||
|             (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))) | ||||
|      ; uncollapse portable-infobox sections | ||||
|      (curry u | ||||
|             (λ (classlist) (and ; removed due to scoping, would improve performance (eq? element-type 'section) | ||||
|                             (member "pi-collapse" classlist))) | ||||
|             (λ (classlist) (filter (λ (v) | ||||
|                                      (and (not (equal? v "pi-collapse-closed")) | ||||
|                                           (not (equal? v "pi-collapse")))) | ||||
|                                    classlist))) | ||||
|      ; generic: includes article sections and tables, probably more | ||||
|      (curry u | ||||
|             (λ (classlist) (and (member "collapsible" classlist) | ||||
|                                 (member "collapsed" classlist))) | ||||
|             (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))))) | ||||
| 
 | ||||
|   (define ((string-replace-curried from to) str) | ||||
|     (string-replace str from to)) | ||||
| 
 | ||||
|   (define class-updater | ||||
|     (compose1 | ||||
|      (string-replace-curried " collapsed" "") | ||||
|      (string-replace-curried "pi-collapse-closed" "") | ||||
|      (string-replace-curried "pi-collapse" ""))) | ||||
| 
 | ||||
|   (define attributes-updater | ||||
|     (compose1 | ||||
|      ; uncollapsing | ||||
|      #;(curry attribute-maybe-update 'class | ||||
|               (λ (class) (string-join (classlist-updater (string-split class " ")) " "))) | ||||
|      (curry attribute-maybe-update 'class class-updater) | ||||
|      ; change links to stay on the same wiki | ||||
|      (curry attribute-maybe-update 'href | ||||
|             (λ (href) | ||||
|               ((compose1 | ||||
|                 (λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname))) | ||||
|                 (λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2"))) | ||||
|                href))) | ||||
|      ; add noreferrer to a.image | ||||
|      (curry u | ||||
|             (λ (v) (and #;(eq? element-type 'a) | ||||
|                         (has-class? "image" v))) | ||||
|             (λ (v) (dict-update v 'rel (λ (s) | ||||
|                                          (list (string-append (car s) " noreferrer"))) | ||||
|                                 '("")))) | ||||
|      ; proxy images from inline styles, if strict_proxy is set | ||||
|      (curry u | ||||
|             (λ (v) (config-true? 'strict_proxy)) | ||||
|             (λ (v) (attribute-maybe-update | ||||
|                     'style | ||||
|                     (λ (style) | ||||
|                       (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style | ||||
|                                       (λ (whole url) | ||||
|                                         (string-append | ||||
|                                          "url(" | ||||
|                                          (u-proxy-url url) | ||||
|                                          ")")))) v))) | ||||
|      ; and also their links, if strict_proxy is set | ||||
|      (curry u | ||||
|             (λ (v) | ||||
|               (and (config-true? 'strict_proxy) | ||||
|                    #;(eq? element-type 'a) | ||||
|                    (or (has-class? "image-thumbnail" v) | ||||
|                        (has-class? "image" v)))) | ||||
|             (λ (v) (attribute-maybe-update 'href u-proxy-url v))) | ||||
|      ; proxy images from src attributes, if strict_proxy is set | ||||
|      (curry u | ||||
|             (λ (v) (config-true? 'strict_proxy)) | ||||
|             (λ (v) (attribute-maybe-update 'src u-proxy-url v))) | ||||
|      ; don't lazyload images | ||||
|      (curry u | ||||
|             (λ (v) (dict-has-key? v 'data-src)) | ||||
|             (λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v))) | ||||
|      ; don't use srcset - TODO: use srcset? | ||||
|      (λ (v) (dict-remove v 'srcset)))) | ||||
| 
 | ||||
|   (define (children-updater attributes children) | ||||
|     ; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia | ||||
|     ((λ (children) | ||||
|        (u | ||||
|         (λ (v) (has-class? "mw-collapsible-content" attributes)) | ||||
|         (λ (v) (for/list ([element v]) | ||||
|                  (u (λ (element) (pair? element)) | ||||
|                     (λ (element) | ||||
|                       `(,(car element) | ||||
|                         (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) | ||||
|                         ,@(filter element-is-content? (cdr element)))) | ||||
|                     element))) | ||||
|         children)) | ||||
|      ; wrap blinking animated images in a slot so they can be animated with CSS | ||||
|      ((λ (children) | ||||
|         (u | ||||
|          (λ (v) (and (has-class? "animated" attributes) | ||||
|                      ((length v) . > . 1))) | ||||
|          (λ (v) | ||||
|            `((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v)))) | ||||
|                    (span (@ (class "animated-slot__inner")) | ||||
|                          ,@v)))) | ||||
|          children)) | ||||
|       children))) | ||||
| 
 | ||||
|   (define (updater element element-type attributes children) | ||||
|     ;; replace whole element? | ||||
|     (cond | ||||
|       ; wrap tables in a div.table-scroller | ||||
|       [(and (eq? element-type 'table) | ||||
|             (has-class? "wikitable" attributes) | ||||
|             (not (dict-has-key? attributes 'data-scrolling))) | ||||
|        `(div | ||||
|          ((class "table-scroller")) | ||||
|          ((,element-type (@ (data-scrolling) ,@attributes) | ||||
|                          ,@children)))] | ||||
|       ; exclude empty figcaptions | ||||
|       [(and (eq? element-type 'figcaption) | ||||
|             (or (eq? (length (filter element-is-element? children)) 0) | ||||
|                 ((query-selector (λ (element-type attributes children) | ||||
|                                    (eq? element-type 'use)) | ||||
|                                  element)))) | ||||
|        return-no-element] | ||||
|       ; exclude infobox items that are videos, and gallery items that are videos | ||||
|       [(and (or (has-class? "pi-item" attributes) | ||||
|                 (has-class? "wikia-gallery-item" attributes)) | ||||
|             ((query-selector (λ (element-type attributes children) | ||||
|                                (has-class? "video-thumbnail" attributes)) | ||||
|                              element))) | ||||
|        return-no-element] | ||||
|       ; exclude the invisible brackets after headings | ||||
|       [(and (eq? element-type 'span) | ||||
|             (has-class? "mw-editsection" attributes)) | ||||
|        return-no-element] | ||||
|       ; display a link instead of an iframe | ||||
|       [(eq? element-type 'iframe) | ||||
|        (define src (car (dict-ref attributes 'src null))) | ||||
|        `(a | ||||
|          ((class "iframe-alternative") (href ,src)) | ||||
|          (,(format "Embedded media: ~a" src)))] | ||||
|       ; remove noscript versions of images because they are likely lower quality than the script versions | ||||
|       [(and (eq? element-type 'noscript) | ||||
|             (match children | ||||
|               ; either the noscript has a.image as a first child... | ||||
|               [(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)] | ||||
|               ; or the noscript has img as a first child | ||||
|               [(list (list 'img _)) #t] | ||||
|               [_ #f])) | ||||
|        return-no-element] | ||||
|       [#t | ||||
|        (list element-type | ||||
|              ;; attributes | ||||
|              (attributes-updater #; element-type attributes) | ||||
|              ;; children | ||||
|              (children-updater attributes children))])) | ||||
| 
 | ||||
|   updater) | ||||
| 
 | ||||
| (define (update-tree-wiki tree wikiname) | ||||
|   (update-tree (updater wikiname) tree)) | ||||
| 
 | ||||
| (module+ test | ||||
|   (define transformed | ||||
|     (parameterize ([(config-parameter 'strict_proxy) "true"]) | ||||
|       (update-tree-wiki wiki-document "test"))) | ||||
|   ; check that wikilinks are changed to be local | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (dict-has-key? a 'data-test-wikilink)) | ||||
|                                         transformed)))) | ||||
|                 "/test/wiki/Another_Page") | ||||
|   ; check that a.image has noreferrer | ||||
|   (check-equal? (get-attribute 'rel (bits->attributes | ||||
|                                      ((query-selector | ||||
|                                        (λ (t a c) (and (eq? t 'a) | ||||
|                                                        (has-class? "image" a))) | ||||
|                                        transformed)))) | ||||
|                 " noreferrer") | ||||
|   ; check that article collapse sections become uncollapsed | ||||
|   (check-equal? (get-attribute 'class (bits->attributes | ||||
|                                        ((query-selector | ||||
|                                          (λ (t a c) (dict-has-key? a 'data-test-collapsesection)) | ||||
|                                          transformed)))) | ||||
|                 "collapsible collapsetoggle-inline") | ||||
|   ; check that iframes are gone | ||||
|   (check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed))) | ||||
|   (check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))] | ||||
|                        [link ((query-selector (λ (t a c) (eq? t 'a)) alternative))]) | ||||
|                   (get-attribute 'href (bits->attributes link))) | ||||
|                 "https://example.com/iframe-src") | ||||
|   ; check that images are proxied | ||||
|   (check-equal? (get-attribute 'src (bits->attributes | ||||
|                                      ((query-selector | ||||
|                                        (λ (t a c) (eq? t 'img)) | ||||
|                                        transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png") | ||||
|   ; check that links to images are proxied | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a))) | ||||
|                                         transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||
|   (check-equal? (get-attribute 'href (bits->attributes | ||||
|                                       ((query-selector | ||||
|                                         (λ (t a c) (member '(data-test-figure-a) a)) | ||||
|                                         transformed)))) | ||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||
|   ; check that noscript images are removed | ||||
|   (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) | ||||
|   ; benchmark | ||||
|   (when (file-exists? "Frog.html2") | ||||
|     (with-input-from-file "Frog.html2" | ||||
|       (λ () | ||||
|         (define tree (html->xexp (current-input-port))) | ||||
|         (time (length (update-tree-wiki tree "minecraft"))))))) | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue