forked from cadence/breezewiki
Compare commits
4 commits
2e292b4f80
...
4b039cca5e
Author | SHA1 | Date | |
---|---|---|---|
4b039cca5e | |||
a9754463b6 | |||
6fef9281c3 | |||
ca13aea547 |
6 changed files with 70 additions and 29 deletions
|
@ -11,7 +11,7 @@
|
||||||
url-segments->guess-title)
|
url-segments->guess-title)
|
||||||
|
|
||||||
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
|
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
|
||||||
(map path/param-path (url-path (string->url str))))
|
(map path/param-path (fix-semicolons-url-path (url-path (string->url str)))))
|
||||||
|
|
||||||
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
|
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
|
||||||
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
|
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
|
||||||
|
|
|
@ -58,7 +58,11 @@
|
||||||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||||
(class "thumbimage")))))
|
(class "thumbimage")))))
|
||||||
(figcaption "Test figure!"))
|
(figcaption "Test figure!"))
|
||||||
(iframe (@ (src "https://example.com/iframe-src")))))))
|
(iframe (@ (src "https://example.com/iframe-src")))
|
||||||
|
(div (@ (class "reviews"))
|
||||||
|
(header "GameSpot Expert Reviews"))
|
||||||
|
(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||||
|
(& ndash))))))
|
||||||
|
|
||||||
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
|
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
|
||||||
;; precompute wikiurl regex for efficency
|
;; precompute wikiurl regex for efficency
|
||||||
|
@ -157,7 +161,7 @@
|
||||||
(u
|
(u
|
||||||
(λ (v) (has-class? "mw-collapsible-content" attributes))
|
(λ (v) (has-class? "mw-collapsible-content" attributes))
|
||||||
(λ (v) (for/list ([element v])
|
(λ (v) (for/list ([element v])
|
||||||
(u (λ (element) (pair? element))
|
(u (λ (element) (element-is-element? element))
|
||||||
(λ (element)
|
(λ (element)
|
||||||
`(,(car element)
|
`(,(car element)
|
||||||
(@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
|
(@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
|
||||||
|
@ -238,6 +242,9 @@
|
||||||
[(list (list 'img _)) #t]
|
[(list (list 'img _)) #t]
|
||||||
[_ #f]))
|
[_ #f]))
|
||||||
return-no-element]
|
return-no-element]
|
||||||
|
; remove gamespot reviews/ads
|
||||||
|
[(has-class? "reviews" attributes)
|
||||||
|
return-no-element]
|
||||||
[#t
|
[#t
|
||||||
(list element-type
|
(list element-type
|
||||||
;; attributes
|
;; attributes
|
||||||
|
@ -297,6 +304,12 @@
|
||||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
||||||
; check that noscript images are removed
|
; check that noscript images are removed
|
||||||
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
|
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
|
||||||
|
; check that gamespot reviews/ads are removed
|
||||||
|
(check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f)
|
||||||
|
; check that (& x) sequences are not broken
|
||||||
|
(check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed))
|
||||||
|
'(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||||
|
(& ndash)))
|
||||||
; benchmark
|
; benchmark
|
||||||
(when (file-exists? "../storage/Frog.html")
|
(when (file-exists? "../storage/Frog.html")
|
||||||
(with-input-from-file "../storage/Frog.html"
|
(with-input-from-file "../storage/Frog.html"
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#lang typed/racket/base
|
#lang typed/racket/base
|
||||||
(require racket/string
|
(require racket/string
|
||||||
|
typed/net/url-structs
|
||||||
"pure-utils.rkt")
|
"pure-utils.rkt")
|
||||||
(require/typed web-server/http/request-structs
|
(require/typed web-server/http/request-structs
|
||||||
[#:opaque Header header?])
|
[#:opaque Header header?])
|
||||||
|
@ -20,7 +21,10 @@
|
||||||
; pass in a header, headers, or something useless. they'll all combine into a list
|
; pass in a header, headers, or something useless. they'll all combine into a list
|
||||||
build-headers
|
build-headers
|
||||||
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
|
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
|
||||||
page-title->path)
|
page-title->path
|
||||||
|
; path/param eats semicolons into params, which need to be fixed back into semicolons
|
||||||
|
fix-semicolons-url-path
|
||||||
|
fix-semicolons-url)
|
||||||
|
|
||||||
(module+ test
|
(module+ test
|
||||||
(require "typed-rackunit.rkt"))
|
(require "typed-rackunit.rkt"))
|
||||||
|
@ -106,3 +110,20 @@
|
||||||
(: page-title->path (String -> Bytes))
|
(: page-title->path (String -> Bytes))
|
||||||
(define (page-title->path title)
|
(define (page-title->path title)
|
||||||
(percent-encode (regexp-replace* " " title "_") path-set #f))
|
(percent-encode (regexp-replace* " " title "_") path-set #f))
|
||||||
|
|
||||||
|
(: fix-semicolons-url-path ((Listof Path/Param) -> (Listof Path/Param)))
|
||||||
|
(define (fix-semicolons-url-path pps)
|
||||||
|
(for/list ([pp pps])
|
||||||
|
(define path (path/param-path pp))
|
||||||
|
(if (or (null? (path/param-param pp))
|
||||||
|
(symbol? path))
|
||||||
|
pp
|
||||||
|
;; path/param does have params, which need to be fixed into a semicolon.
|
||||||
|
(path/param
|
||||||
|
(string-append path ";" (string-join (path/param-param pp) ";"))
|
||||||
|
null))))
|
||||||
|
|
||||||
|
(: fix-semicolons-url (URL -> URL))
|
||||||
|
(define (fix-semicolons-url orig-url)
|
||||||
|
(struct-copy url orig-url [path (fix-semicolons-url-path (url-path orig-url))]))
|
||||||
|
|
||||||
|
|
|
@ -86,15 +86,16 @@
|
||||||
|
|
||||||
; "element" is a real element with a type and everything (non-string, non-attributes)
|
; "element" is a real element with a type and everything (non-string, non-attributes)
|
||||||
(define (element-is-element? element)
|
(define (element-is-element? element)
|
||||||
(and (element-is-bits? element) (not (element-is-xattributes? element))))
|
(and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element))))
|
||||||
(module+ test
|
(module+ test
|
||||||
(check-true (element-is-element? '(span "hi")))
|
(check-true (element-is-element? '(span "hi")))
|
||||||
(check-false (element-is-element? '(@ (alt "Cute cat."))))
|
(check-false (element-is-element? '(@ (alt "Cute cat."))))
|
||||||
(check-false (element-is-element? "hi")))
|
(check-false (element-is-element? "hi"))
|
||||||
|
(check-false (element-is-element? '(& ndash))))
|
||||||
|
|
||||||
; "element content" is a real element or a string
|
; "element content" is a real element or a string or a (& x) sequence
|
||||||
(define (element-is-content? element)
|
(define (element-is-content? element)
|
||||||
(or (string? element) (element-is-element? element)))
|
(or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&))))
|
||||||
(module+ test
|
(module+ test
|
||||||
(check-true (element-is-content? '(span "hi")))
|
(check-true (element-is-content? '(span "hi")))
|
||||||
(check-false (element-is-content? '(@ (alt "Cute cat."))))
|
(check-false (element-is-content? '(@ (alt "Cute cat."))))
|
||||||
|
|
|
@ -59,16 +59,5 @@
|
||||||
(make-semicolon-fixer-dispatcher tree))
|
(make-semicolon-fixer-dispatcher tree))
|
||||||
|
|
||||||
(define ((make-semicolon-fixer-dispatcher orig-dispatcher) conn orig-req)
|
(define ((make-semicolon-fixer-dispatcher orig-dispatcher) conn orig-req)
|
||||||
(define orig-uri (request-uri orig-req))
|
(define new-req (struct-copy request orig-req [uri (fix-semicolons-url (request-uri orig-req))]))
|
||||||
(define pps (url-path orig-uri)) ; list of path/param structs
|
|
||||||
(define new-path
|
|
||||||
(for/list ([pp pps])
|
|
||||||
(if (null? (path/param-param pp))
|
|
||||||
pp
|
|
||||||
;; path/param does have params, which need to be fixed into a semicolon.
|
|
||||||
(path/param
|
|
||||||
(string-append (path/param-path pp) ";" (string-join (path/param-param pp) ";"))
|
|
||||||
null))))
|
|
||||||
(define new-uri (struct-copy url orig-uri [path new-path]))
|
|
||||||
(define new-req (struct-copy request orig-req [uri new-uri]))
|
|
||||||
(orig-dispatcher conn new-req))
|
(orig-dispatcher conn new-req))
|
||||||
|
|
|
@ -28,28 +28,35 @@
|
||||||
(define search-json-data
|
(define search-json-data
|
||||||
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
||||||
|
|
||||||
|
;; this takes the info we gathered from fandom and makes the big fat x-expression page
|
||||||
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
|
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
|
||||||
(define search-results (jp "/query/search" data))
|
(define search-results (jp "/query/search" data))
|
||||||
|
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
|
||||||
(generate-wiki-page
|
(generate-wiki-page
|
||||||
|
;; so I provide my helper function with the necessary context...
|
||||||
#:req req
|
#:req req
|
||||||
#:source-url dest-url
|
#:source-url dest-url
|
||||||
#:wikiname wikiname
|
#:wikiname wikiname
|
||||||
#:title query
|
#:title query
|
||||||
#:siteinfo siteinfo
|
#:siteinfo siteinfo
|
||||||
|
;; and here's the actual results to display in the wiki page layout
|
||||||
`(div (@ (class "mw-parser-output"))
|
`(div (@ (class "mw-parser-output"))
|
||||||
|
;; header before the search results showing how many we found
|
||||||
(p ,(format "~a results found for " (length search-results))
|
(p ,(format "~a results found for " (length search-results))
|
||||||
(strong ,query))
|
(strong ,query))
|
||||||
|
;; *u*nordered *l*ist of matching search results
|
||||||
(ul ,@(map
|
(ul ,@(map
|
||||||
(λ (result)
|
(λ (result) ;; for each result, run this code...
|
||||||
(let* ([title (jp "/title" result)]
|
(let* ([title (jp "/title" result)]
|
||||||
[page-path (page-title->path title)]
|
[page-path (page-title->path title)]
|
||||||
[timestamp (jp "/timestamp" result)]
|
[timestamp (jp "/timestamp" result)]
|
||||||
[wordcount (jp "/wordcount" result)]
|
[wordcount (jp "/wordcount" result)]
|
||||||
[size (jp "/size" result)])
|
[size (jp "/size" result)])
|
||||||
|
;; and make this x-expression...
|
||||||
`(li (@ (class "my-result"))
|
`(li (@ (class "my-result"))
|
||||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path)))
|
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
|
||||||
,title)
|
,title) ; using unquote to insert the result page title
|
||||||
(div (@ (class "my-result__info"))
|
(div (@ (class "my-result__info")) ; constructing the line under the search result
|
||||||
"last edited "
|
"last edited "
|
||||||
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
||||||
,(format ", ~a words, ~a kb"
|
,(format ", ~a words, ~a kb"
|
||||||
|
@ -57,13 +64,18 @@
|
||||||
(exact->inexact (/ (round (/ size 100)) 10)))))))
|
(exact->inexact (/ (round (/ size 100)) 10)))))))
|
||||||
search-results)))))
|
search-results)))))
|
||||||
|
|
||||||
|
;; will be called when the web browser asks to load the page
|
||||||
(define (page-search req)
|
(define (page-search req)
|
||||||
|
;; this just means, catch any errors and display them in the browser. it's a function somewhere else
|
||||||
(response-handler
|
(response-handler
|
||||||
|
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
|
||||||
|
;; grab the first part to use as the wikiname, in this case, "minecraft"
|
||||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||||
|
;; grab the part after ?q= which is the search terms
|
||||||
(define query (dict-ref (url-query (request-uri req)) 'q #f))
|
(define query (dict-ref (url-query (request-uri req)) 'q #f))
|
||||||
|
;; constructing the URL where I want to get fandom data from...
|
||||||
(define origin (format "https://~a.fandom.com" wikiname))
|
(define origin (format "https://~a.fandom.com" wikiname))
|
||||||
(when (config-true? 'feature_offline::only)
|
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
|
||||||
(raise-user-error "Full search is currently not available on breezewiki.com - for now, please use the pop-up search suggestions or wait for me to fix it! Thanks <3"))
|
|
||||||
(define dest-url
|
(define dest-url
|
||||||
(format "~a/api.php?~a"
|
(format "~a/api.php?~a"
|
||||||
origin
|
origin
|
||||||
|
@ -73,21 +85,26 @@
|
||||||
("formatversion" . "2")
|
("formatversion" . "2")
|
||||||
("format" . "json")))))
|
("format" . "json")))))
|
||||||
|
|
||||||
|
;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name)
|
||||||
(define-values (dest-res siteinfo)
|
(define-values (dest-res siteinfo)
|
||||||
(thread-values
|
(thread-values
|
||||||
(λ ()
|
(λ ()
|
||||||
(log-outgoing dest-url)
|
(log-outgoing dest-url)
|
||||||
(easy:get dest-url #:timeouts timeouts))
|
(easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results
|
||||||
(λ ()
|
(λ ()
|
||||||
(siteinfo-fetch wikiname))))
|
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
|
||||||
|
|
||||||
|
;; search results are a JSON string. parse JSON into racket data structures
|
||||||
(define data (easy:response-json dest-res))
|
(define data (easy:response-json dest-res))
|
||||||
|
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
|
||||||
|
;; big fat x-expression goes into the body variable
|
||||||
(define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
|
(define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
|
||||||
|
;; error checking
|
||||||
(when (config-true? 'debug)
|
(when (config-true? 'debug)
|
||||||
; used for its side effects
|
; used for its side effects
|
||||||
; convert to string with error checking, error will be raised if xexp is invalid
|
; convert to string with error checking, error will be raised if xexp is invalid
|
||||||
(xexp->html body))
|
(xexp->html body))
|
||||||
|
;; convert body to HTML and send to browser
|
||||||
(response/output
|
(response/output
|
||||||
#:code 200
|
#:code 200
|
||||||
#:headers (build-headers always-headers)
|
#:headers (build-headers always-headers)
|
||||||
|
|
Loading…
Reference in a new issue