From a9754463b6d9d95c63516e501f39dc83aa5ef2fa Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sat, 27 May 2023 23:41:20 +1200 Subject: [PATCH 1/2] Fix (& x) sequences truncating the page --- lib/tree-updater.rkt | 10 ++++++++-- lib/xexpr-utils.rkt | 9 +++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/lib/tree-updater.rkt b/lib/tree-updater.rkt index e7d48b4..098af3d 100644 --- a/lib/tree-updater.rkt +++ b/lib/tree-updater.rkt @@ -60,7 +60,9 @@ (figcaption "Test figure!")) (iframe (@ (src "https://example.com/iframe-src"))) (div (@ (class "reviews")) - (header "GameSpot Expert Reviews")))))) + (header "GameSpot Expert Reviews")) + (div (@ (data-test-ampersand) (class "mw-collapsible-content")) + (& ndash)))))) (define (updater wikiname #:strict-proxy? [strict-proxy? #f]) ;; precompute wikiurl regex for efficency @@ -159,7 +161,7 @@ (u (λ (v) (has-class? "mw-collapsible-content" attributes)) (λ (v) (for/list ([element v]) - (u (λ (element) (pair? element)) + (u (λ (element) (element-is-element? element)) (λ (element) `(,(car element) (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) @@ -304,6 +306,10 @@ (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) ; check that gamespot reviews/ads are removed (check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f) + ; check that (& x) sequences are not broken + (check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed)) + '(div (@ (data-test-ampersand) (class "mw-collapsible-content")) + (& ndash))) ; benchmark (when (file-exists? "../storage/Frog.html") (with-input-from-file "../storage/Frog.html" diff --git a/lib/xexpr-utils.rkt b/lib/xexpr-utils.rkt index cb40510..e1ac957 100644 --- a/lib/xexpr-utils.rkt +++ b/lib/xexpr-utils.rkt @@ -86,15 +86,16 @@ ; "element" is a real element with a type and everything (non-string, non-attributes) (define (element-is-element? element) - (and (element-is-bits? element) (not (element-is-xattributes? element)))) + (and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element)))) (module+ test (check-true (element-is-element? '(span "hi"))) (check-false (element-is-element? '(@ (alt "Cute cat.")))) - (check-false (element-is-element? "hi"))) + (check-false (element-is-element? "hi")) + (check-false (element-is-element? '(& ndash)))) -; "element content" is a real element or a string +; "element content" is a real element or a string or a (& x) sequence (define (element-is-content? element) - (or (string? element) (element-is-element? element))) + (or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&)))) (module+ test (check-true (element-is-content? '(span "hi"))) (check-false (element-is-content? '(@ (alt "Cute cat.")))) From 4b039cca5e0ed30d1ec8c76e5ec7ff47acaef507 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Sat, 27 May 2023 23:48:08 +1200 Subject: [PATCH 2/2] Explain how page-search works in the code --- src/page-search.rkt | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/page-search.rkt b/src/page-search.rkt index ce527c0..e4960d8 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -28,28 +28,35 @@ (define search-json-data '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) +;; this takes the info we gathered from fandom and makes the big fat x-expression page (define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f]) (define search-results (jp "/query/search" data)) + ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle (generate-wiki-page + ;; so I provide my helper function with the necessary context... #:req req #:source-url dest-url #:wikiname wikiname #:title query #:siteinfo siteinfo + ;; and here's the actual results to display in the wiki page layout `(div (@ (class "mw-parser-output")) + ;; header before the search results showing how many we found (p ,(format "~a results found for " (length search-results)) (strong ,query)) + ;; *u*nordered *l*ist of matching search results (ul ,@(map - (λ (result) + (λ (result) ;; for each result, run this code... (let* ([title (jp "/title" result)] [page-path (page-title->path title)] [timestamp (jp "/timestamp" result)] [wordcount (jp "/wordcount" result)] [size (jp "/size" result)]) + ;; and make this x-expression... `(li (@ (class "my-result")) - (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) - ,title) - (div (@ (class "my-result__info")) + (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL + ,title) ; using unquote to insert the result page title + (div (@ (class "my-result__info")) ; constructing the line under the search result "last edited " (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) ,(format ", ~a words, ~a kb" @@ -57,13 +64,18 @@ (exact->inexact (/ (round (/ size 100)) 10))))))) search-results))))) +;; will be called when the web browser asks to load the page (define (page-search req) + ;; this just means, catch any errors and display them in the browser. it's a function somewhere else (response-handler + ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" + ;; grab the first part to use as the wikiname, in this case, "minecraft" (define wikiname (path/param-path (first (url-path (request-uri req))))) + ;; grab the part after ?q= which is the search terms (define query (dict-ref (url-query (request-uri req)) 'q #f)) + ;; constructing the URL where I want to get fandom data from... (define origin (format "https://~a.fandom.com" wikiname)) - (when (config-true? 'feature_offline::only) - (raise-user-error "Full search is currently not available on breezewiki.com - for now, please use the pop-up search suggestions or wait for me to fix it! Thanks <3")) + ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json (define dest-url (format "~a/api.php?~a" origin @@ -73,21 +85,26 @@ ("formatversion" . "2") ("format" . "json"))))) + ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name) (define-values (dest-res siteinfo) (thread-values (λ () (log-outgoing dest-url) - (easy:get dest-url #:timeouts timeouts)) + (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results (λ () - (siteinfo-fetch wikiname)))) + (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki + ;; search results are a JSON string. parse JSON into racket data structures (define data (easy:response-json dest-res)) - + ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression + ;; big fat x-expression goes into the body variable (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) + ;; error checking (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid (xexp->html body)) + ;; convert body to HTML and send to browser (response/output #:code 200 #:headers (build-headers always-headers)