From 61c304cf41880cc50cf0d69e754389ac0b17d972 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 6 Dec 2023 13:08:20 +1300 Subject: [PATCH] Implement Solr search provider --- src/config.rkt | 1 + src/page-search.rkt | 79 ++++++++++------------------- src/search-provider-fandom.rkt | 63 ++++++++++++++++++++++++ src/search-provider-solr.rkt | 90 ++++++++++++++++++++++++++++++++++ static/main.css | 13 +++++ 5 files changed, 192 insertions(+), 54 deletions(-) create mode 100644 src/search-provider-fandom.rkt create mode 100644 src/search-provider-solr.rkt diff --git a/src/config.rkt b/src/config.rkt index 2158e77..fec546b 100644 --- a/src/config.rkt +++ b/src/config.rkt @@ -47,6 +47,7 @@ (feature_offline::enabled . "false") (feature_offline::format . "json.gz") (feature_offline::only . "false") + (feature_offline::search . "fandom") (access_log::enabled . "false") diff --git a/src/page-search.rkt b/src/page-search.rkt index e4960d8..4fb76f5 100644 --- a/src/page-search.rkt +++ b/src/page-search.rkt @@ -13,6 +13,8 @@ "application-globals.rkt" "config.rkt" "data.rkt" + "search-provider-fandom.rkt" + "search-provider-solr.rkt" "../lib/syntax.rkt" "../lib/thread-utils.rkt" "../lib/url-utils.rkt" @@ -22,47 +24,22 @@ (provide page-search) -(module+ test - (require rackunit - "test-utils.rkt") - (define search-json-data - '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) +(define search-providers + (hash "fandom" generate-results-content-fandom + "solr" generate-results-content-solr)) ;; this takes the info we gathered from fandom and makes the big fat x-expression page -(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f]) - (define search-results (jp "/query/search" data)) +(define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f]) ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle (generate-wiki-page ;; so I provide my helper function with the necessary context... #:req req - #:source-url dest-url + #:source-url source-url #:wikiname wikiname #:title query #:siteinfo siteinfo ;; and here's the actual results to display in the wiki page layout - `(div (@ (class "mw-parser-output")) - ;; header before the search results showing how many we found - (p ,(format "~a results found for " (length search-results)) - (strong ,query)) - ;; *u*nordered *l*ist of matching search results - (ul ,@(map - (λ (result) ;; for each result, run this code... - (let* ([title (jp "/title" result)] - [page-path (page-title->path title)] - [timestamp (jp "/timestamp" result)] - [wordcount (jp "/wordcount" result)] - [size (jp "/size" result)]) - ;; and make this x-expression... - `(li (@ (class "my-result")) - (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL - ,title) ; using unquote to insert the result page title - (div (@ (class "my-result__info")) ; constructing the line under the search result - "last edited " - (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) - ,(format ", ~a words, ~a kb" - wordcount - (exact->inexact (/ (round (/ size 100)) 10))))))) - search-results))))) + results-content)) ;; will be called when the web browser asks to load the page (define (page-search req) @@ -71,34 +48,31 @@ ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" ;; grab the first part to use as the wikiname, in this case, "minecraft" (define wikiname (path/param-path (first (url-path (request-uri req))))) + ;; grab a dict of url search params + (define params (url-query (request-uri req))) ;; grab the part after ?q= which is the search terms - (define query (dict-ref (url-query (request-uri req)) 'q #f)) - ;; constructing the URL where I want to get fandom data from... - (define origin (format "https://~a.fandom.com" wikiname)) - ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json - (define dest-url - (format "~a/api.php?~a" - origin - (params->query `(("action" . "query") - ("list" . "search") - ("srsearch" . ,query) - ("formatversion" . "2") - ("format" . "json"))))) + (define query (dict-ref params 'q #f)) + ;; figure out which search provider we're going to use + (define search-provider (hash-ref search-providers (config-get 'feature_offline::search))) - ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name) - (define-values (dest-res siteinfo) + ;; external special:search url to link at the bottom of the page as the upstream source + (define external-search-url + (format "https://~a.fandom.com/wiki/Special:Search?~a" + wikiname + (params->query `(("query" . ,query) + ("search" . "internal"))))) + + ;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name) + (define-values (results-content siteinfo) (thread-values (λ () - (log-outgoing dest-url) - (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results + (search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt") (λ () (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki - ;; search results are a JSON string. parse JSON into racket data structures - (define data (easy:response-json dest-res)) ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression ;; big fat x-expression goes into the body variable - (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) + (define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo)) ;; error checking (when (config-true? 'debug) ; used for its side effects @@ -110,7 +84,4 @@ #:headers (build-headers always-headers) (λ (out) (write-html body out))))) -(module+ test - (parameterize ([(config-parameter 'feature_offline::only) "false"]) - (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") - (generate-results-page test-req "" "test" "Gacha" search-json-data)))))) + diff --git a/src/search-provider-fandom.rkt b/src/search-provider-fandom.rkt new file mode 100644 index 0000000..945b111 --- /dev/null +++ b/src/search-provider-fandom.rkt @@ -0,0 +1,63 @@ +#lang racket/base +(require racket/string + (prefix-in easy: net/http-easy) + "application-globals.rkt" + "config.rkt" + "../lib/url-utils.rkt" + "whole-utils.rkt" + "../lib/xexpr-utils.rkt") + +(provide + generate-results-content-fandom) + +(module+ test + (require rackunit + "test-utils.rkt") + (define search-json-data + '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) + +(define (generate-results-content-fandom wikiname query params) + ;; constructing the URL where I want to get fandom data from... + (define origin (format "https://~a.fandom.com" wikiname)) + ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json + (define dest-url + (format "~a/api.php?~a" + origin + (params->query `(("action" . "query") + ("list" . "search") + ("srsearch" . ,query) + ("formatversion" . "2") + ("format" . "json"))))) + ;; HTTP request to dest-url for search results + (log-outgoing dest-url) + (define res (easy:get dest-url #:timeouts timeouts)) + (define json (easy:response-json res)) + (define search-results (jp "/query/search" json)) + + ;; generate content for display in the wiki page layout + `(div (@ (class "mw-parser-output")) + ;; header before the search results showing how many we found + (p ,(format "~a results found for " (length search-results)) + (strong ,query)) + ;; *u*nordered *l*ist of matching search results + (ul ,@(for/list ([result search-results]) + (let* ([title (jp "/title" result)] + [page-path (page-title->path title)] + [timestamp (jp "/timestamp" result)] + [wordcount (jp "/wordcount" result)] + [size (jp "/size" result)]) + ;; and make this x-expression... + `(li (@ (class "my-result")) + (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL + ,title) ; using unquote to insert the result page title + (div (@ (class "my-result__info")) ; constructing the line under the search result + "last edited " + (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) + ,(format ", ~a words, ~a kb" + wordcount + (exact->inexact (/ (round (/ size 100)) 10)))))))))) + +(module+ test + (parameterize ([(config-parameter 'feature_offline::only) "false"]) + (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") + (generate-results-content-fandom test-req "" "test" "Gacha" search-json-data)))))) diff --git a/src/search-provider-solr.rkt b/src/search-provider-solr.rkt new file mode 100644 index 0000000..fbd77de --- /dev/null +++ b/src/search-provider-solr.rkt @@ -0,0 +1,90 @@ +#lang racket/base +(require racket/dict + racket/string + (prefix-in easy: net/http-easy) + "application-globals.rkt" + "../lib/html-parsing/main.rkt" + "../lib/url-utils.rkt" + "whole-utils.rkt" + "../lib/xexpr-utils.rkt") + +(provide + generate-results-content-solr) + +(struct result^ (hl-title hl-body kb words page-path) #:transparent) + +(define (generate-results-content-solr wikiname query params) + ;; grab things from params that would modify the search + (define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND"))) + (define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc"))) + + ;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams= + (define dest-url + (format "http://localhost:8983/solr/~a/select?~a" + wikiname + (params->query `(("defType" . "edismax") + ("q" . ,query) + ("q.op" . ,(cdr op)) + ("qf" . "title_prefix title^2.0 body table^0.3") + ("hl" . "true") + ("hl.method" . "unified") + ("hl.defaultSummary" . "true") + ("hl.fl" . "title,body") + ("fl" . "id,len,title") + ("hl.encoder" . "html") + ("hl.tag.pre" . "") + ("hl.tag.post" . "") + ("sort" . ,(cdr sort)))))) + ;; HTTP request to dest-url for search results + (log-outgoing dest-url) + (define res (easy:get dest-url #:timeouts timeouts)) + (define json (easy:response-json res)) + + ;; build result objects + (define highlighting (jp "/highlighting" json)) + (define results + (for/list ([doc (jp "/response/docs" json)]) + (define id (jp "/id" doc)) + (define len (jp "/len" doc)) + (define title (jp "/title" doc)) + (define page-path (page-title->path title)) + (define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1 + (define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10 + (define hl (hash-ref highlighting (string->symbol id))) + (define hl-title (cdr (html->xexp (jp "/title/0" hl)))) + (define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl))))) + (result^ hl-title hl-body kb words page-path))) + + (define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100))) + + (define (value-selected? value current-value) + (append + `((value ,value)) + (if (equal? value current-value) + `((selected)) + `()))) + + ;; generate content for display in the wiki page layout + `(div (@ (class "mw-parser-output")) + (form (@ (class "my-result__filter")) + (input (@ (type "hidden") (name "q") (value ,query))) + (select (@ (name "op")) + (option (@ ,@(value-selected? "and" (car op))) "All words must match") + (option (@ ,@(value-selected? "or" (car op))) "Some words must match")) + (select (@ (name "sort")) + (option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles") + (option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles")) + (button "Filter results")) + ;; header before the search results showing how many we found + (p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime) + (strong ,query)) + ;; *u*nordered *l*ist of matching search results + (ul ,@(for/list ([result results]) + `(li (@ (class "my-result")) + (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url + ,@(result^-hl-title result)) ; title + (p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview + (div (@ (class "my-result__info")) ; line under the search result + ,(format "~a words, ~a kb of readable stuff" + (result^-words result) + (result^-kb result)))))))) diff --git a/static/main.css b/static/main.css index cb32df5..e3c0070 100644 --- a/static/main.css +++ b/static/main.css @@ -249,11 +249,24 @@ a.ext-audiobutton { /* see hearthstone/wiki/Diablo_(Duels_hero) */ .my-result__link { font-size: 1.2em; } +.my-result__description { + font-size: 0.8em; + white-space: pre-line; + margin-left: 1.2em; +} +.my-result mark { + background: rgba(255, 255, 0, 0.4); +} .my-result__info { font-size: 0.8em; color: var(--theme-page-text-color--hover); margin-left: 1.2em; } +.my-result__filter { + display: grid; + grid-template-columns: auto auto auto 1fr; + grid-gap: 8px; +} /* (breezewiki) search suggestions */ .bw-search-form {