Fix (& x) sequences truncating the page

This commit is contained in:
Cadence Ember 2023-05-27 23:41:20 +12:00
parent 6fef9281c3
commit a9754463b6
Signed by: cadence
GPG key ID: BC1C2C61CF521B17
2 changed files with 13 additions and 6 deletions

View file

@ -60,7 +60,9 @@
(figcaption "Test figure!"))
(iframe (@ (src "https://example.com/iframe-src")))
(div (@ (class "reviews"))
(header "GameSpot Expert Reviews"))))))
(header "GameSpot Expert Reviews"))
(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
(& ndash))))))
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
;; precompute wikiurl regex for efficency
@ -159,7 +161,7 @@
(u
(λ (v) (has-class? "mw-collapsible-content" attributes))
(λ (v) (for/list ([element v])
(u (λ (element) (pair? element))
(u (λ (element) (element-is-element? element))
(λ (element)
`(,(car element)
(@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
@ -304,6 +306,10 @@
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
; check that gamespot reviews/ads are removed
(check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f)
; check that (& x) sequences are not broken
(check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed))
'(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
(& ndash)))
; benchmark
(when (file-exists? "../storage/Frog.html")
(with-input-from-file "../storage/Frog.html"

View file

@ -86,15 +86,16 @@
; "element" is a real element with a type and everything (non-string, non-attributes)
(define (element-is-element? element)
(and (element-is-bits? element) (not (element-is-xattributes? element))))
(and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element))))
(module+ test
(check-true (element-is-element? '(span "hi")))
(check-false (element-is-element? '(@ (alt "Cute cat."))))
(check-false (element-is-element? "hi")))
(check-false (element-is-element? "hi"))
(check-false (element-is-element? '(& ndash))))
; "element content" is a real element or a string
; "element content" is a real element or a string or a (& x) sequence
(define (element-is-content? element)
(or (string? element) (element-is-element? element)))
(or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&))))
(module+ test
(check-true (element-is-content? '(span "hi")))
(check-false (element-is-content? '(@ (alt "Cute cat."))))