diff --git a/lib/archive-file-mappings.rkt b/lib/archive-file-mappings.rkt index ba013ab..4aa8a69 100644 --- a/lib/archive-file-mappings.rkt +++ b/lib/archive-file-mappings.rkt @@ -11,7 +11,7 @@ url-segments->guess-title) (define (local-encoded-url->segments str) ; '("wiki" "Page_title") - (map path/param-path (fix-semicolons-url-path (url-path (string->url str))))) + (map path/param-path (url-path (string->url str)))) (define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix (define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments))) diff --git a/lib/tree-updater.rkt b/lib/tree-updater.rkt index 098af3d..0dbf695 100644 --- a/lib/tree-updater.rkt +++ b/lib/tree-updater.rkt @@ -58,11 +58,7 @@ (data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "thumbimage"))))) (figcaption "Test figure!")) - (iframe (@ (src "https://example.com/iframe-src"))) - (div (@ (class "reviews")) - (header "GameSpot Expert Reviews")) - (div (@ (data-test-ampersand) (class "mw-collapsible-content")) - (& ndash)))))) + (iframe (@ (src "https://example.com/iframe-src"))))))) (define (updater wikiname #:strict-proxy? [strict-proxy? #f]) ;; precompute wikiurl regex for efficency @@ -161,7 +157,7 @@ (u (λ (v) (has-class? "mw-collapsible-content" attributes)) (λ (v) (for/list ([element v]) - (u (λ (element) (element-is-element? element)) + (u (λ (element) (pair? element)) (λ (element) `(,(car element) (@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element))) @@ -242,9 +238,6 @@ [(list (list 'img _)) #t] [_ #f])) return-no-element] - ; remove gamespot reviews/ads - [(has-class? "reviews" attributes) - return-no-element] [#t (list element-type ;; attributes @@ -304,12 +297,6 @@ "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png") ; check that noscript images are removed (check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f) - ; check that gamespot reviews/ads are removed - (check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f) - ; check that (& x) sequences are not broken - (check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed)) - '(div (@ (data-test-ampersand) (class "mw-collapsible-content")) - (& ndash))) ; benchmark (when (file-exists? "../storage/Frog.html") (with-input-from-file "../storage/Frog.html" diff --git a/lib/url-utils.rkt b/lib/url-utils.rkt index 3fb4310..4722d49 100644 --- a/lib/url-utils.rkt +++ b/lib/url-utils.rkt @@ -1,6 +1,5 @@ #lang typed/racket/base (require racket/string - typed/net/url-structs "pure-utils.rkt") (require/typed web-server/http/request-structs [#:opaque Header header?]) @@ -21,10 +20,7 @@ ; pass in a header, headers, or something useless. they'll all combine into a list build-headers ; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url - page-title->path - ; path/param eats semicolons into params, which need to be fixed back into semicolons - fix-semicolons-url-path - fix-semicolons-url) + page-title->path) (module+ test (require "typed-rackunit.rkt")) @@ -110,20 +106,3 @@ (: page-title->path (String -> Bytes)) (define (page-title->path title) (percent-encode (regexp-replace* " " title "_") path-set #f)) - -(: fix-semicolons-url-path ((Listof Path/Param) -> (Listof Path/Param))) -(define (fix-semicolons-url-path pps) - (for/list ([pp pps]) - (define path (path/param-path pp)) - (if (or (null? (path/param-param pp)) - (symbol? path)) - pp - ;; path/param does have params, which need to be fixed into a semicolon. - (path/param - (string-append path ";" (string-join (path/param-param pp) ";")) - null)))) - -(: fix-semicolons-url (URL -> URL)) -(define (fix-semicolons-url orig-url) - (struct-copy url orig-url [path (fix-semicolons-url-path (url-path orig-url))])) - diff --git a/lib/xexpr-utils.rkt b/lib/xexpr-utils.rkt index e1ac957..cb40510 100644 --- a/lib/xexpr-utils.rkt +++ b/lib/xexpr-utils.rkt @@ -86,16 +86,15 @@ ; "element" is a real element with a type and everything (non-string, non-attributes) (define (element-is-element? element) - (and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element)))) + (and (element-is-bits? element) (not (element-is-xattributes? element)))) (module+ test (check-true (element-is-element? '(span "hi"))) (check-false (element-is-element? '(@ (alt "Cute cat.")))) - (check-false (element-is-element? "hi")) - (check-false (element-is-element? '(& ndash)))) + (check-false (element-is-element? "hi"))) -; "element content" is a real element or a string or a (& x) sequence +; "element content" is a real element or a string (define (element-is-content? element) - (or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&)))) + (or (string? element) (element-is-element? element))) (module+ test (check-true (element-is-content? '(span "hi"))) (check-false (element-is-content? '(@ (alt "Cute cat.")))) diff --git a/src/dispatcher-tree.rkt b/src/dispatcher-tree.rkt index 48e8ebb..315638a 100644 --- a/src/dispatcher-tree.rkt +++ b/src/dispatcher-tree.rkt @@ -59,5 +59,16 @@ (make-semicolon-fixer-dispatcher tree)) (define ((make-semicolon-fixer-dispatcher orig-dispatcher) conn orig-req) - (define new-req (struct-copy request orig-req [uri (fix-semicolons-url (request-uri orig-req))])) + (define orig-uri (request-uri orig-req)) + (define pps (url-path orig-uri)) ; list of path/param structs + (define new-path + (for/list ([pp pps]) + (if (null? (path/param-param pp)) + pp + ;; path/param does have params, which need to be fixed into a semicolon. + (path/param + (string-append (path/param-path pp) ";" (string-join (path/param-param pp) ";")) + null)))) + (define new-uri (struct-copy url orig-uri [path new-path])) + (define new-req (struct-copy request orig-req [uri new-uri])) (orig-dispatcher conn new-req)) diff --git a/src/page-search.rkt b/src/page-search.rkt index e4960d8..ce527c0 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -28,35 +28,28 @@ (define search-json-data '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) -;; this takes the info we gathered from fandom and makes the big fat x-expression page (define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f]) (define search-results (jp "/query/search" data)) - ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle (generate-wiki-page - ;; so I provide my helper function with the necessary context... #:req req #:source-url dest-url #:wikiname wikiname #:title query #:siteinfo siteinfo - ;; and here's the actual results to display in the wiki page layout `(div (@ (class "mw-parser-output")) - ;; header before the search results showing how many we found (p ,(format "~a results found for " (length search-results)) (strong ,query)) - ;; *u*nordered *l*ist of matching search results (ul ,@(map - (λ (result) ;; for each result, run this code... + (λ (result) (let* ([title (jp "/title" result)] [page-path (page-title->path title)] [timestamp (jp "/timestamp" result)] [wordcount (jp "/wordcount" result)] [size (jp "/size" result)]) - ;; and make this x-expression... `(li (@ (class "my-result")) - (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL - ,title) ; using unquote to insert the result page title - (div (@ (class "my-result__info")) ; constructing the line under the search result + (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) + ,title) + (div (@ (class "my-result__info")) "last edited " (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) ,(format ", ~a words, ~a kb" @@ -64,18 +57,13 @@ (exact->inexact (/ (round (/ size 100)) 10))))))) search-results))))) -;; will be called when the web browser asks to load the page (define (page-search req) - ;; this just means, catch any errors and display them in the browser. it's a function somewhere else (response-handler - ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" - ;; grab the first part to use as the wikiname, in this case, "minecraft" (define wikiname (path/param-path (first (url-path (request-uri req))))) - ;; grab the part after ?q= which is the search terms (define query (dict-ref (url-query (request-uri req)) 'q #f)) - ;; constructing the URL where I want to get fandom data from... (define origin (format "https://~a.fandom.com" wikiname)) - ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json + (when (config-true? 'feature_offline::only) + (raise-user-error "Full search is currently not available on breezewiki.com - for now, please use the pop-up search suggestions or wait for me to fix it! Thanks <3")) (define dest-url (format "~a/api.php?~a" origin @@ -85,26 +73,21 @@ ("formatversion" . "2") ("format" . "json"))))) - ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name) (define-values (dest-res siteinfo) (thread-values (λ () (log-outgoing dest-url) - (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results + (easy:get dest-url #:timeouts timeouts)) (λ () - (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki + (siteinfo-fetch wikiname)))) - ;; search results are a JSON string. parse JSON into racket data structures (define data (easy:response-json dest-res)) - ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression - ;; big fat x-expression goes into the body variable + (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) - ;; error checking (when (config-true? 'debug) ; used for its side effects ; convert to string with error checking, error will be raised if xexp is invalid (xexp->html body)) - ;; convert body to HTML and send to browser (response/output #:code 200 #:headers (build-headers always-headers)