forked from cadence/breezewiki
Compare commits
4 commits
2e292b4f80
...
4b039cca5e
Author | SHA1 | Date | |
---|---|---|---|
4b039cca5e | |||
a9754463b6 | |||
6fef9281c3 | |||
ca13aea547 |
6 changed files with 70 additions and 29 deletions
|
@ -11,7 +11,7 @@
|
|||
url-segments->guess-title)
|
||||
|
||||
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
|
||||
(map path/param-path (url-path (string->url str))))
|
||||
(map path/param-path (fix-semicolons-url-path (url-path (string->url str)))))
|
||||
|
||||
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
|
||||
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
|
||||
|
|
|
@ -58,7 +58,11 @@
|
|||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(class "thumbimage")))))
|
||||
(figcaption "Test figure!"))
|
||||
(iframe (@ (src "https://example.com/iframe-src")))))))
|
||||
(iframe (@ (src "https://example.com/iframe-src")))
|
||||
(div (@ (class "reviews"))
|
||||
(header "GameSpot Expert Reviews"))
|
||||
(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||
(& ndash))))))
|
||||
|
||||
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
|
||||
;; precompute wikiurl regex for efficency
|
||||
|
@ -157,7 +161,7 @@
|
|||
(u
|
||||
(λ (v) (has-class? "mw-collapsible-content" attributes))
|
||||
(λ (v) (for/list ([element v])
|
||||
(u (λ (element) (pair? element))
|
||||
(u (λ (element) (element-is-element? element))
|
||||
(λ (element)
|
||||
`(,(car element)
|
||||
(@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
|
||||
|
@ -238,6 +242,9 @@
|
|||
[(list (list 'img _)) #t]
|
||||
[_ #f]))
|
||||
return-no-element]
|
||||
; remove gamespot reviews/ads
|
||||
[(has-class? "reviews" attributes)
|
||||
return-no-element]
|
||||
[#t
|
||||
(list element-type
|
||||
;; attributes
|
||||
|
@ -297,6 +304,12 @@
|
|||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
||||
; check that noscript images are removed
|
||||
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
|
||||
; check that gamespot reviews/ads are removed
|
||||
(check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f)
|
||||
; check that (& x) sequences are not broken
|
||||
(check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed))
|
||||
'(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||
(& ndash)))
|
||||
; benchmark
|
||||
(when (file-exists? "../storage/Frog.html")
|
||||
(with-input-from-file "../storage/Frog.html"
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#lang typed/racket/base
|
||||
(require racket/string
|
||||
typed/net/url-structs
|
||||
"pure-utils.rkt")
|
||||
(require/typed web-server/http/request-structs
|
||||
[#:opaque Header header?])
|
||||
|
@ -20,7 +21,10 @@
|
|||
; pass in a header, headers, or something useless. they'll all combine into a list
|
||||
build-headers
|
||||
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
|
||||
page-title->path)
|
||||
page-title->path
|
||||
; path/param eats semicolons into params, which need to be fixed back into semicolons
|
||||
fix-semicolons-url-path
|
||||
fix-semicolons-url)
|
||||
|
||||
(module+ test
|
||||
(require "typed-rackunit.rkt"))
|
||||
|
@ -106,3 +110,20 @@
|
|||
(: page-title->path (String -> Bytes))
|
||||
(define (page-title->path title)
|
||||
(percent-encode (regexp-replace* " " title "_") path-set #f))
|
||||
|
||||
(: fix-semicolons-url-path ((Listof Path/Param) -> (Listof Path/Param)))
|
||||
(define (fix-semicolons-url-path pps)
|
||||
(for/list ([pp pps])
|
||||
(define path (path/param-path pp))
|
||||
(if (or (null? (path/param-param pp))
|
||||
(symbol? path))
|
||||
pp
|
||||
;; path/param does have params, which need to be fixed into a semicolon.
|
||||
(path/param
|
||||
(string-append path ";" (string-join (path/param-param pp) ";"))
|
||||
null))))
|
||||
|
||||
(: fix-semicolons-url (URL -> URL))
|
||||
(define (fix-semicolons-url orig-url)
|
||||
(struct-copy url orig-url [path (fix-semicolons-url-path (url-path orig-url))]))
|
||||
|
||||
|
|
|
@ -86,15 +86,16 @@
|
|||
|
||||
; "element" is a real element with a type and everything (non-string, non-attributes)
|
||||
(define (element-is-element? element)
|
||||
(and (element-is-bits? element) (not (element-is-xattributes? element))))
|
||||
(and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element))))
|
||||
(module+ test
|
||||
(check-true (element-is-element? '(span "hi")))
|
||||
(check-false (element-is-element? '(@ (alt "Cute cat."))))
|
||||
(check-false (element-is-element? "hi")))
|
||||
(check-false (element-is-element? "hi"))
|
||||
(check-false (element-is-element? '(& ndash))))
|
||||
|
||||
; "element content" is a real element or a string
|
||||
; "element content" is a real element or a string or a (& x) sequence
|
||||
(define (element-is-content? element)
|
||||
(or (string? element) (element-is-element? element)))
|
||||
(or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&))))
|
||||
(module+ test
|
||||
(check-true (element-is-content? '(span "hi")))
|
||||
(check-false (element-is-content? '(@ (alt "Cute cat."))))
|
||||
|
|
|
@ -59,16 +59,5 @@
|
|||
(make-semicolon-fixer-dispatcher tree))
|
||||
|
||||
(define ((make-semicolon-fixer-dispatcher orig-dispatcher) conn orig-req)
|
||||
(define orig-uri (request-uri orig-req))
|
||||
(define pps (url-path orig-uri)) ; list of path/param structs
|
||||
(define new-path
|
||||
(for/list ([pp pps])
|
||||
(if (null? (path/param-param pp))
|
||||
pp
|
||||
;; path/param does have params, which need to be fixed into a semicolon.
|
||||
(path/param
|
||||
(string-append (path/param-path pp) ";" (string-join (path/param-param pp) ";"))
|
||||
null))))
|
||||
(define new-uri (struct-copy url orig-uri [path new-path]))
|
||||
(define new-req (struct-copy request orig-req [uri new-uri]))
|
||||
(define new-req (struct-copy request orig-req [uri (fix-semicolons-url (request-uri orig-req))]))
|
||||
(orig-dispatcher conn new-req))
|
||||
|
|
|
@ -28,28 +28,35 @@
|
|||
(define search-json-data
|
||||
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
||||
|
||||
;; this takes the info we gathered from fandom and makes the big fat x-expression page
|
||||
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
|
||||
(define search-results (jp "/query/search" data))
|
||||
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
|
||||
(generate-wiki-page
|
||||
;; so I provide my helper function with the necessary context...
|
||||
#:req req
|
||||
#:source-url dest-url
|
||||
#:wikiname wikiname
|
||||
#:title query
|
||||
#:siteinfo siteinfo
|
||||
;; and here's the actual results to display in the wiki page layout
|
||||
`(div (@ (class "mw-parser-output"))
|
||||
;; header before the search results showing how many we found
|
||||
(p ,(format "~a results found for " (length search-results))
|
||||
(strong ,query))
|
||||
;; *u*nordered *l*ist of matching search results
|
||||
(ul ,@(map
|
||||
(λ (result)
|
||||
(λ (result) ;; for each result, run this code...
|
||||
(let* ([title (jp "/title" result)]
|
||||
[page-path (page-title->path title)]
|
||||
[timestamp (jp "/timestamp" result)]
|
||||
[wordcount (jp "/wordcount" result)]
|
||||
[size (jp "/size" result)])
|
||||
;; and make this x-expression...
|
||||
`(li (@ (class "my-result"))
|
||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path)))
|
||||
,title)
|
||||
(div (@ (class "my-result__info"))
|
||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
|
||||
,title) ; using unquote to insert the result page title
|
||||
(div (@ (class "my-result__info")) ; constructing the line under the search result
|
||||
"last edited "
|
||||
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
||||
,(format ", ~a words, ~a kb"
|
||||
|
@ -57,13 +64,18 @@
|
|||
(exact->inexact (/ (round (/ size 100)) 10)))))))
|
||||
search-results)))))
|
||||
|
||||
;; will be called when the web browser asks to load the page
|
||||
(define (page-search req)
|
||||
;; this just means, catch any errors and display them in the browser. it's a function somewhere else
|
||||
(response-handler
|
||||
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
|
||||
;; grab the first part to use as the wikiname, in this case, "minecraft"
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
;; grab the part after ?q= which is the search terms
|
||||
(define query (dict-ref (url-query (request-uri req)) 'q #f))
|
||||
;; constructing the URL where I want to get fandom data from...
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(when (config-true? 'feature_offline::only)
|
||||
(raise-user-error "Full search is currently not available on breezewiki.com - for now, please use the pop-up search suggestions or wait for me to fix it! Thanks <3"))
|
||||
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
|
||||
(define dest-url
|
||||
(format "~a/api.php?~a"
|
||||
origin
|
||||
|
@ -73,21 +85,26 @@
|
|||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
|
||||
;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name)
|
||||
(define-values (dest-res siteinfo)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(log-outgoing dest-url)
|
||||
(easy:get dest-url #:timeouts timeouts))
|
||||
(easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results
|
||||
(λ ()
|
||||
(siteinfo-fetch wikiname))))
|
||||
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
|
||||
|
||||
;; search results are a JSON string. parse JSON into racket data structures
|
||||
(define data (easy:response-json dest-res))
|
||||
|
||||
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
|
||||
;; big fat x-expression goes into the body variable
|
||||
(define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
|
||||
;; error checking
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
;; convert body to HTML and send to browser
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
|
|
Loading…
Reference in a new issue