forked from cadence/breezewiki
		
	Optimise update-tree-wiki somewhat
This commit is contained in:
		
							parent
							
								
									ba3b39242c
								
							
						
					
					
						commit
						9fd2b4699d
					
				
					 2 changed files with 272 additions and 228 deletions
				
			
		|  | @ -19,6 +19,7 @@ | ||||||
|          "data.rkt" |          "data.rkt" | ||||||
|          "pure-utils.rkt" |          "pure-utils.rkt" | ||||||
|          "syntax.rkt" |          "syntax.rkt" | ||||||
|  |          "tree-updater.rkt" | ||||||
|          "xexpr-utils.rkt" |          "xexpr-utils.rkt" | ||||||
|          "url-utils.rkt") |          "url-utils.rkt") | ||||||
| 
 | 
 | ||||||
|  | @ -30,51 +31,19 @@ | ||||||
|  preprocess-html-wiki) |  preprocess-html-wiki) | ||||||
| 
 | 
 | ||||||
| (module+ test | (module+ test | ||||||
|   (require rackunit) |   (require rackunit)) | ||||||
|   (define wiki-document |  | ||||||
|     '(*TOP* |  | ||||||
|       (div (@ (class "mw-parser-output")) |  | ||||||
|            (aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default")) |  | ||||||
|                   (h2 (@ (class "pi-item pi-title") (data-source "title")) |  | ||||||
|                       "Infobox Title") |  | ||||||
|                   (figure (@ (class "pi-item pi-image") (data-source "image")) |  | ||||||
|                           (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title "")) |  | ||||||
|                              (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail"))))) |  | ||||||
|                   (div (@ (class "pi-item pi-data") (data-source "description")) |  | ||||||
|                        (h3 (@ (class "pi-data-label")) |  | ||||||
|                            "Description") |  | ||||||
|                        (div (@ (class "pi-data-value")) |  | ||||||
|                             "Mystery infobox!"))) |  | ||||||
|            (div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed")) |  | ||||||
|                 (i (b "This section is hidden for dramatic effect.")) |  | ||||||
|                 (div (@ (class "collapsible-content")) |  | ||||||
|                      (p "Another page link: " |  | ||||||
|                         (a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page")) |  | ||||||
|                            "Another Page")))) |  | ||||||
|            (figure (@ (class "thumb tnone")) |  | ||||||
|                    (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a)) |  | ||||||
|                       (img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D") |  | ||||||
|                               (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") |  | ||||||
|                               (class "thumbimage lazyload")))) |  | ||||||
|                    (noscript |  | ||||||
|                     (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image")) |  | ||||||
|                        (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") |  | ||||||
|                                (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") |  | ||||||
|                                (class "thumbimage"))))) |  | ||||||
|                    (figcaption "Test figure!")) |  | ||||||
|            (iframe (@ (src "https://example.com/iframe-src"))))))) |  | ||||||
| 
 | 
 | ||||||
| (define (preprocess-html-wiki html) | (define (preprocess-html-wiki html) | ||||||
|   (define (rr* find replace contents) |   (define ((rr* find replace) contents) | ||||||
|     (regexp-replace* find contents replace)) |     (regexp-replace* find contents replace)) | ||||||
|   ((compose1 |   ((compose1 | ||||||
|     ; fix navbox list nesting |     ; fix navbox list nesting | ||||||
|     ; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree |     ; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree | ||||||
|     ; add a <ul> to make the parser happy |     ; add a <ul> to make the parser happy | ||||||
|     ; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies |     ; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies | ||||||
|     (curry rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2") |     (rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2") | ||||||
|     ; change <figcaption><p> to <figcaption><span> to make the parser happy |     ; change <figcaption><p> to <figcaption><span> to make the parser happy | ||||||
|     (curry rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>")) |     (rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>")) | ||||||
|    html)) |    html)) | ||||||
| (module+ test | (module+ test | ||||||
|   (check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>") |   (check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>") | ||||||
|  | @ -82,198 +51,6 @@ | ||||||
|   (check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> 	<p class=\"caption\">Caption text.</p></figcaption></figure>") |   (check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> 	<p class=\"caption\">Caption text.</p></figcaption></figure>") | ||||||
|                 "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>")) |                 "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>")) | ||||||
| 
 | 
 | ||||||
| (define (update-tree-wiki tree wikiname) |  | ||||||
|   (update-tree |  | ||||||
|    (λ (element element-type attributes children) |  | ||||||
|      ;; replace whole element? |  | ||||||
|      (cond |  | ||||||
|        ; wrap tables in a div.table-scroller |  | ||||||
|        [(and (eq? element-type 'table) |  | ||||||
|              (has-class? "wikitable" attributes) |  | ||||||
|              (not (dict-has-key? attributes 'data-scrolling))) |  | ||||||
|         `(div |  | ||||||
|           ((class "table-scroller")) |  | ||||||
|           ((,element-type (@ (data-scrolling) ,@attributes) |  | ||||||
|                           ,@children)))] |  | ||||||
|        ; exclude empty figcaptions |  | ||||||
|        [(and (eq? element-type 'figcaption) |  | ||||||
|              (or (eq? (length (filter element-is-element? children)) 0) |  | ||||||
|                  ((query-selector (λ (element-type attributes children) |  | ||||||
|                                     (eq? element-type 'use)) |  | ||||||
|                                   element)))) |  | ||||||
|         return-no-element] |  | ||||||
|        ; exclude infobox items that are videos, and gallery items that are videos |  | ||||||
|        [(and (or (has-class? "pi-item" attributes) |  | ||||||
|                  (has-class? "wikia-gallery-item" attributes)) |  | ||||||
|              ((query-selector (λ (element-type attributes children) |  | ||||||
|                                 (has-class? "video-thumbnail" attributes)) |  | ||||||
|                               element))) |  | ||||||
|         return-no-element] |  | ||||||
|        ; exclude the invisible brackets after headings |  | ||||||
|        [(and (eq? element-type 'span) |  | ||||||
|              (has-class? "mw-editsection" attributes)) |  | ||||||
|         return-no-element] |  | ||||||
|        ; display a link instead of an iframe |  | ||||||
|        [(eq? element-type 'iframe) |  | ||||||
|         (define src (car (dict-ref attributes 'src null))) |  | ||||||
|         `(a |  | ||||||
|           ((class "iframe-alternative") (href ,src)) |  | ||||||
|           (,(format "Embedded media: ~a" src)))] |  | ||||||
|        ; remove noscript versions of images because they are likely lower quality than the script versions |  | ||||||
|        [(and (eq? element-type 'noscript) |  | ||||||
|              (match children |  | ||||||
|                ; either the noscript has a.image as a first child... |  | ||||||
|                [(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)] |  | ||||||
|                ; or the noscript has img as a first child |  | ||||||
|                [(list (list 'img _)) #t] |  | ||||||
|                [_ #f])) |  | ||||||
|         return-no-element] |  | ||||||
|        [#t |  | ||||||
|         (list element-type |  | ||||||
|               ;; attributes |  | ||||||
|               ((compose1 |  | ||||||
|                 ; uncollapsing |  | ||||||
|                 (curry attribute-maybe-update 'class |  | ||||||
|                        (λ (class) |  | ||||||
|                          (string-join |  | ||||||
|                           ((compose1 |  | ||||||
|                             ; uncollapse all navbox items (bottom of page mass navigation) |  | ||||||
|                             (curry u |  | ||||||
|                                    (λ (classlist) (and (eq? element-type 'table) |  | ||||||
|                                                        (member "navbox" classlist) |  | ||||||
|                                                        (member "collapsed" classlist))) |  | ||||||
|                                    (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))) |  | ||||||
|                             ; uncollapse portable-infobox sections |  | ||||||
|                             (curry u |  | ||||||
|                                    (λ (classlist) (and (eq? element-type 'section) |  | ||||||
|                                                        (member "pi-collapse" classlist))) |  | ||||||
|                                    (λ (classlist) (filter (λ (v) |  | ||||||
|                                                             (and (not (equal? v "pi-collapse-closed")) |  | ||||||
|                                                                  (not (equal? v "pi-collapse")))) |  | ||||||
|                                                           classlist))) |  | ||||||
|                             ; generic: includes article sections and tables, probably more |  | ||||||
|                             (curry u |  | ||||||
|                                    (λ (classlist) (and (member "collapsible" classlist) |  | ||||||
|                                                        (member "collapsed" classlist))) |  | ||||||
|                                    (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))) |  | ||||||
|                            (string-split class " ")) |  | ||||||
|                           " "))) |  | ||||||
|                 ; change links to stay on the same wiki |  | ||||||
|                 (curry attribute-maybe-update 'href |  | ||||||
|                        (λ (href) |  | ||||||
|                          ((compose1 |  | ||||||
|                            (λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname))) |  | ||||||
|                            (λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2"))) |  | ||||||
|                           href))) |  | ||||||
|                 ; add noreferrer to a.image |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (and (eq? element-type 'a) |  | ||||||
|                                    (has-class? "image" v))) |  | ||||||
|                        (λ (v) (dict-update v 'rel (λ (s) |  | ||||||
|                                                     (list (string-append (car s) " noreferrer"))) |  | ||||||
|                                            '("")))) |  | ||||||
|                 ; proxy images from inline styles, if strict_proxy is set |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (config-true? 'strict_proxy)) |  | ||||||
|                        (λ (v) (attribute-maybe-update |  | ||||||
|                                'style |  | ||||||
|                                (λ (style) |  | ||||||
|                                  (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style |  | ||||||
|                                                  (λ (whole url) |  | ||||||
|                                                    (string-append |  | ||||||
|                                                     "url(" |  | ||||||
|                                                     (u-proxy-url url) |  | ||||||
|                                                     ")")))) v))) |  | ||||||
|                 ; and also their links, if strict_proxy is set |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) |  | ||||||
|                          (and (config-true? 'strict_proxy) |  | ||||||
|                               (eq? element-type 'a) |  | ||||||
|                               (or (has-class? "image-thumbnail" v) |  | ||||||
|                                   (has-class? "image" v)))) |  | ||||||
|                        (λ (v) (attribute-maybe-update 'href u-proxy-url v))) |  | ||||||
|                 ; proxy images from src attributes, if strict_proxy is set |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (config-true? 'strict_proxy)) |  | ||||||
|                        (λ (v) (attribute-maybe-update 'src u-proxy-url v))) |  | ||||||
|                 ; don't lazyload images |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (dict-has-key? v 'data-src)) |  | ||||||
|                        (λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v))) |  | ||||||
|                 ; don't use srcset - TODO: use srcset? |  | ||||||
|                 (λ (v) (dict-remove v 'srcset))) |  | ||||||
|                attributes) |  | ||||||
|               ;; children |  | ||||||
|               ((compose1 |  | ||||||
|                 ; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (has-class? "mw-collapsible-content" attributes)) |  | ||||||
|                        (λ (v) (for/list ([element v]) |  | ||||||
|                                 (u (λ (element) (pair? element)) |  | ||||||
|                                    (λ (element) |  | ||||||
|                                      `(,(car element) |  | ||||||
|                                        (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) |  | ||||||
|                                        ,@(filter element-is-content? (cdr element)))) |  | ||||||
|                                    element)))) |  | ||||||
|                 ; wrap blinking animated images in a slot so they can be animated with CSS |  | ||||||
|                 (curry u |  | ||||||
|                        (λ (v) (and (has-class? "animated" attributes) |  | ||||||
|                                    ((length v) . > . 1))) |  | ||||||
|                        (λ (v) |  | ||||||
|                          `((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v)))) |  | ||||||
|                                  (span (@ (class "animated-slot__inner")) |  | ||||||
|                                        ,@v)))))) |  | ||||||
|                children))])) |  | ||||||
|    tree)) |  | ||||||
| (module+ test |  | ||||||
|   (define transformed |  | ||||||
|     (parameterize ([(config-parameter 'strict_proxy) "true"]) |  | ||||||
|       (update-tree-wiki wiki-document "test"))) |  | ||||||
|   ; check that wikilinks are changed to be local |  | ||||||
|   (check-equal? (get-attribute 'href (bits->attributes |  | ||||||
|                                       ((query-selector |  | ||||||
|                                         (λ (t a c) (dict-has-key? a 'data-test-wikilink)) |  | ||||||
|                                         transformed)))) |  | ||||||
|                 "/test/wiki/Another_Page") |  | ||||||
|   ; check that a.image has noreferrer |  | ||||||
|   (check-equal? (get-attribute 'rel (bits->attributes |  | ||||||
|                                      ((query-selector |  | ||||||
|                                        (λ (t a c) (and (eq? t 'a) |  | ||||||
|                                                        (has-class? "image" a))) |  | ||||||
|                                        transformed)))) |  | ||||||
|                 " noreferrer") |  | ||||||
|   ; check that article collapse sections become uncollapsed |  | ||||||
|   (check-equal? (get-attribute 'class (bits->attributes |  | ||||||
|                                        ((query-selector |  | ||||||
|                                          (λ (t a c) (dict-has-key? a 'data-test-collapsesection)) |  | ||||||
|                                          transformed)))) |  | ||||||
|                 "collapsible collapsetoggle-inline") |  | ||||||
|   ; check that iframes are gone |  | ||||||
|   (check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed))) |  | ||||||
|   (check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))] |  | ||||||
|                        [link ((query-selector (λ (t a c) (eq? t 'a)) alternative))]) |  | ||||||
|                   (get-attribute 'href (bits->attributes link))) |  | ||||||
|                 "https://example.com/iframe-src") |  | ||||||
|   ; check that images are proxied |  | ||||||
|   (check-equal? (get-attribute 'src (bits->attributes |  | ||||||
|                                      ((query-selector |  | ||||||
|                                        (λ (t a c) (eq? t 'img)) |  | ||||||
|                                        transformed)))) |  | ||||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png") |  | ||||||
|   ; check that links to images are proxied |  | ||||||
|   (check-equal? (get-attribute 'href (bits->attributes |  | ||||||
|                                       ((query-selector |  | ||||||
|                                         (λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a))) |  | ||||||
|                                         transformed)))) |  | ||||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") |  | ||||||
|   (check-equal? (get-attribute 'href (bits->attributes |  | ||||||
|                                       ((query-selector |  | ||||||
|                                         (λ (t a c) (member '(data-test-figure-a) a)) |  | ||||||
|                                         transformed)))) |  | ||||||
|                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") |  | ||||||
|   ; check that noscript images are removed |  | ||||||
|   (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)) |  | ||||||
| 
 |  | ||||||
| (define (page-wiki req) | (define (page-wiki req) | ||||||
|   (define wikiname (path/param-path (first (url-path (request-uri req))))) |   (define wikiname (path/param-path (first (url-path (request-uri req))))) | ||||||
|   (define user-cookies (user-cookies-getter req)) |   (define user-cookies (user-cookies-getter req)) | ||||||
|  |  | ||||||
							
								
								
									
										267
									
								
								src/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								src/tree-updater.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,267 @@ | ||||||
|  | #lang racket/base | ||||||
|  | (require racket/dict | ||||||
|  |          racket/function | ||||||
|  |          racket/match | ||||||
|  |          racket/string | ||||||
|  |          "config.rkt" | ||||||
|  |          "pure-utils.rkt" | ||||||
|  |          "url-utils.rkt" | ||||||
|  |          "xexpr-utils.rkt") | ||||||
|  | 
 | ||||||
|  | (provide | ||||||
|  |  update-tree-wiki) | ||||||
|  | 
 | ||||||
|  | (module+ test | ||||||
|  |   (require rackunit | ||||||
|  |            html-parsing) | ||||||
|  |   (define wiki-document | ||||||
|  |     '(*TOP* | ||||||
|  |       (div (@ (class "mw-parser-output")) | ||||||
|  |            (aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default")) | ||||||
|  |                   (h2 (@ (class "pi-item pi-title") (data-source "title")) | ||||||
|  |                       "Infobox Title") | ||||||
|  |                   (figure (@ (class "pi-item pi-image") (data-source "image")) | ||||||
|  |                           (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title "")) | ||||||
|  |                              (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail"))))) | ||||||
|  |                   (div (@ (class "pi-item pi-data") (data-source "description")) | ||||||
|  |                        (h3 (@ (class "pi-data-label")) | ||||||
|  |                            "Description") | ||||||
|  |                        (div (@ (class "pi-data-value")) | ||||||
|  |                             "Mystery infobox!"))) | ||||||
|  |            (div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed")) | ||||||
|  |                 (i (b "This section is hidden for dramatic effect.")) | ||||||
|  |                 (div (@ (class "collapsible-content")) | ||||||
|  |                      (p "Another page link: " | ||||||
|  |                         (a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page")) | ||||||
|  |                            "Another Page")))) | ||||||
|  |            (figure (@ (class "thumb tnone")) | ||||||
|  |                    (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a)) | ||||||
|  |                       (img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D") | ||||||
|  |                               (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||||
|  |                               (class "thumbimage lazyload")))) | ||||||
|  |                    (noscript | ||||||
|  |                     (a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image")) | ||||||
|  |                        (img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||||
|  |                                (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") | ||||||
|  |                                (class "thumbimage"))))) | ||||||
|  |                    (figcaption "Test figure!")) | ||||||
|  |            (iframe (@ (src "https://example.com/iframe-src"))))))) | ||||||
|  | 
 | ||||||
|  | (define (updater wikiname) | ||||||
|  |   (define classlist-updater | ||||||
|  |     (compose1 | ||||||
|  |      ; uncollapse all navbox items (bottom of page mass navigation) | ||||||
|  |      (curry u | ||||||
|  |             (λ (classlist) (and ; removed due to scoping, would improve peformance (eq? element-type 'table) | ||||||
|  |                             (member "navbox" classlist) | ||||||
|  |                             (member "collapsed" classlist))) | ||||||
|  |             (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))) | ||||||
|  |      ; uncollapse portable-infobox sections | ||||||
|  |      (curry u | ||||||
|  |             (λ (classlist) (and ; removed due to scoping, would improve performance (eq? element-type 'section) | ||||||
|  |                             (member "pi-collapse" classlist))) | ||||||
|  |             (λ (classlist) (filter (λ (v) | ||||||
|  |                                      (and (not (equal? v "pi-collapse-closed")) | ||||||
|  |                                           (not (equal? v "pi-collapse")))) | ||||||
|  |                                    classlist))) | ||||||
|  |      ; generic: includes article sections and tables, probably more | ||||||
|  |      (curry u | ||||||
|  |             (λ (classlist) (and (member "collapsible" classlist) | ||||||
|  |                                 (member "collapsed" classlist))) | ||||||
|  |             (λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))))) | ||||||
|  | 
 | ||||||
|  |   (define ((string-replace-curried from to) str) | ||||||
|  |     (string-replace str from to)) | ||||||
|  | 
 | ||||||
|  |   (define class-updater | ||||||
|  |     (compose1 | ||||||
|  |      (string-replace-curried " collapsed" "") | ||||||
|  |      (string-replace-curried "pi-collapse-closed" "") | ||||||
|  |      (string-replace-curried "pi-collapse" ""))) | ||||||
|  | 
 | ||||||
|  |   (define attributes-updater | ||||||
|  |     (compose1 | ||||||
|  |      ; uncollapsing | ||||||
|  |      #;(curry attribute-maybe-update 'class | ||||||
|  |               (λ (class) (string-join (classlist-updater (string-split class " ")) " "))) | ||||||
|  |      (curry attribute-maybe-update 'class class-updater) | ||||||
|  |      ; change links to stay on the same wiki | ||||||
|  |      (curry attribute-maybe-update 'href | ||||||
|  |             (λ (href) | ||||||
|  |               ((compose1 | ||||||
|  |                 (λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname))) | ||||||
|  |                 (λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2"))) | ||||||
|  |                href))) | ||||||
|  |      ; add noreferrer to a.image | ||||||
|  |      (curry u | ||||||
|  |             (λ (v) (and #;(eq? element-type 'a) | ||||||
|  |                         (has-class? "image" v))) | ||||||
|  |             (λ (v) (dict-update v 'rel (λ (s) | ||||||
|  |                                          (list (string-append (car s) " noreferrer"))) | ||||||
|  |                                 '("")))) | ||||||
|  |      ; proxy images from inline styles, if strict_proxy is set | ||||||
|  |      (curry u | ||||||
|  |             (λ (v) (config-true? 'strict_proxy)) | ||||||
|  |             (λ (v) (attribute-maybe-update | ||||||
|  |                     'style | ||||||
|  |                     (λ (style) | ||||||
|  |                       (regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style | ||||||
|  |                                       (λ (whole url) | ||||||
|  |                                         (string-append | ||||||
|  |                                          "url(" | ||||||
|  |                                          (u-proxy-url url) | ||||||
|  |                                          ")")))) v))) | ||||||
|  |      ; and also their links, if strict_proxy is set | ||||||
|  |      (curry u | ||||||
|  |             (λ (v) | ||||||
|  |               (and (config-true? 'strict_proxy) | ||||||
|  |                    #;(eq? element-type 'a) | ||||||
|  |                    (or (has-class? "image-thumbnail" v) | ||||||
|  |                        (has-class? "image" v)))) | ||||||
|  |             (λ (v) (attribute-maybe-update 'href u-proxy-url v))) | ||||||
|  |      ; proxy images from src attributes, if strict_proxy is set | ||||||
|  |      (curry u | ||||||
|  |             (λ (v) (config-true? 'strict_proxy)) | ||||||
|  |             (λ (v) (attribute-maybe-update 'src u-proxy-url v))) | ||||||
|  |      ; don't lazyload images | ||||||
|  |      (curry u | ||||||
|  |             (λ (v) (dict-has-key? v 'data-src)) | ||||||
|  |             (λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v))) | ||||||
|  |      ; don't use srcset - TODO: use srcset? | ||||||
|  |      (λ (v) (dict-remove v 'srcset)))) | ||||||
|  | 
 | ||||||
|  |   (define (children-updater attributes children) | ||||||
|  |     ; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia | ||||||
|  |     ((λ (children) | ||||||
|  |        (u | ||||||
|  |         (λ (v) (has-class? "mw-collapsible-content" attributes)) | ||||||
|  |         (λ (v) (for/list ([element v]) | ||||||
|  |                  (u (λ (element) (pair? element)) | ||||||
|  |                     (λ (element) | ||||||
|  |                       `(,(car element) | ||||||
|  |                         (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) | ||||||
|  |                         ,@(filter element-is-content? (cdr element)))) | ||||||
|  |                     element))) | ||||||
|  |         children)) | ||||||
|  |      ; wrap blinking animated images in a slot so they can be animated with CSS | ||||||
|  |      ((λ (children) | ||||||
|  |         (u | ||||||
|  |          (λ (v) (and (has-class? "animated" attributes) | ||||||
|  |                      ((length v) . > . 1))) | ||||||
|  |          (λ (v) | ||||||
|  |            `((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v)))) | ||||||
|  |                    (span (@ (class "animated-slot__inner")) | ||||||
|  |                          ,@v)))) | ||||||
|  |          children)) | ||||||
|  |       children))) | ||||||
|  | 
 | ||||||
|  |   (define (updater element element-type attributes children) | ||||||
|  |     ;; replace whole element? | ||||||
|  |     (cond | ||||||
|  |       ; wrap tables in a div.table-scroller | ||||||
|  |       [(and (eq? element-type 'table) | ||||||
|  |             (has-class? "wikitable" attributes) | ||||||
|  |             (not (dict-has-key? attributes 'data-scrolling))) | ||||||
|  |        `(div | ||||||
|  |          ((class "table-scroller")) | ||||||
|  |          ((,element-type (@ (data-scrolling) ,@attributes) | ||||||
|  |                          ,@children)))] | ||||||
|  |       ; exclude empty figcaptions | ||||||
|  |       [(and (eq? element-type 'figcaption) | ||||||
|  |             (or (eq? (length (filter element-is-element? children)) 0) | ||||||
|  |                 ((query-selector (λ (element-type attributes children) | ||||||
|  |                                    (eq? element-type 'use)) | ||||||
|  |                                  element)))) | ||||||
|  |        return-no-element] | ||||||
|  |       ; exclude infobox items that are videos, and gallery items that are videos | ||||||
|  |       [(and (or (has-class? "pi-item" attributes) | ||||||
|  |                 (has-class? "wikia-gallery-item" attributes)) | ||||||
|  |             ((query-selector (λ (element-type attributes children) | ||||||
|  |                                (has-class? "video-thumbnail" attributes)) | ||||||
|  |                              element))) | ||||||
|  |        return-no-element] | ||||||
|  |       ; exclude the invisible brackets after headings | ||||||
|  |       [(and (eq? element-type 'span) | ||||||
|  |             (has-class? "mw-editsection" attributes)) | ||||||
|  |        return-no-element] | ||||||
|  |       ; display a link instead of an iframe | ||||||
|  |       [(eq? element-type 'iframe) | ||||||
|  |        (define src (car (dict-ref attributes 'src null))) | ||||||
|  |        `(a | ||||||
|  |          ((class "iframe-alternative") (href ,src)) | ||||||
|  |          (,(format "Embedded media: ~a" src)))] | ||||||
|  |       ; remove noscript versions of images because they are likely lower quality than the script versions | ||||||
|  |       [(and (eq? element-type 'noscript) | ||||||
|  |             (match children | ||||||
|  |               ; either the noscript has a.image as a first child... | ||||||
|  |               [(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)] | ||||||
|  |               ; or the noscript has img as a first child | ||||||
|  |               [(list (list 'img _)) #t] | ||||||
|  |               [_ #f])) | ||||||
|  |        return-no-element] | ||||||
|  |       [#t | ||||||
|  |        (list element-type | ||||||
|  |              ;; attributes | ||||||
|  |              (attributes-updater #; element-type attributes) | ||||||
|  |              ;; children | ||||||
|  |              (children-updater attributes children))])) | ||||||
|  | 
 | ||||||
|  |   updater) | ||||||
|  | 
 | ||||||
|  | (define (update-tree-wiki tree wikiname) | ||||||
|  |   (update-tree (updater wikiname) tree)) | ||||||
|  | 
 | ||||||
|  | (module+ test | ||||||
|  |   (define transformed | ||||||
|  |     (parameterize ([(config-parameter 'strict_proxy) "true"]) | ||||||
|  |       (update-tree-wiki wiki-document "test"))) | ||||||
|  |   ; check that wikilinks are changed to be local | ||||||
|  |   (check-equal? (get-attribute 'href (bits->attributes | ||||||
|  |                                       ((query-selector | ||||||
|  |                                         (λ (t a c) (dict-has-key? a 'data-test-wikilink)) | ||||||
|  |                                         transformed)))) | ||||||
|  |                 "/test/wiki/Another_Page") | ||||||
|  |   ; check that a.image has noreferrer | ||||||
|  |   (check-equal? (get-attribute 'rel (bits->attributes | ||||||
|  |                                      ((query-selector | ||||||
|  |                                        (λ (t a c) (and (eq? t 'a) | ||||||
|  |                                                        (has-class? "image" a))) | ||||||
|  |                                        transformed)))) | ||||||
|  |                 " noreferrer") | ||||||
|  |   ; check that article collapse sections become uncollapsed | ||||||
|  |   (check-equal? (get-attribute 'class (bits->attributes | ||||||
|  |                                        ((query-selector | ||||||
|  |                                          (λ (t a c) (dict-has-key? a 'data-test-collapsesection)) | ||||||
|  |                                          transformed)))) | ||||||
|  |                 "collapsible collapsetoggle-inline") | ||||||
|  |   ; check that iframes are gone | ||||||
|  |   (check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed))) | ||||||
|  |   (check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))] | ||||||
|  |                        [link ((query-selector (λ (t a c) (eq? t 'a)) alternative))]) | ||||||
|  |                   (get-attribute 'href (bits->attributes link))) | ||||||
|  |                 "https://example.com/iframe-src") | ||||||
|  |   ; check that images are proxied | ||||||
|  |   (check-equal? (get-attribute 'src (bits->attributes | ||||||
|  |                                      ((query-selector | ||||||
|  |                                        (λ (t a c) (eq? t 'img)) | ||||||
|  |                                        transformed)))) | ||||||
|  |                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png") | ||||||
|  |   ; check that links to images are proxied | ||||||
|  |   (check-equal? (get-attribute 'href (bits->attributes | ||||||
|  |                                       ((query-selector | ||||||
|  |                                         (λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a))) | ||||||
|  |                                         transformed)))) | ||||||
|  |                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||||
|  |   (check-equal? (get-attribute 'href (bits->attributes | ||||||
|  |                                       ((query-selector | ||||||
|  |                                         (λ (t a c) (member '(data-test-figure-a) a)) | ||||||
|  |                                         transformed)))) | ||||||
|  |                 "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") | ||||||
|  |   ; check that noscript images are removed | ||||||
|  |   (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) | ||||||
|  |   ; benchmark | ||||||
|  |   (when (file-exists? "Frog.html2") | ||||||
|  |     (with-input-from-file "Frog.html2" | ||||||
|  |       (λ () | ||||||
|  |         (define tree (html->xexp (current-input-port))) | ||||||
|  |         (time (length (update-tree-wiki tree "minecraft"))))))) | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue