Implement Solr search provider

This commit is contained in:
Cadence Ember 2023-12-06 13:08:20 +13:00
parent 76eaaa0a47
commit 61c304cf41
5 changed files with 192 additions and 54 deletions

View file

@ -47,6 +47,7 @@
(feature_offline::enabled . "false")
(feature_offline::format . "json.gz")
(feature_offline::only . "false")
(feature_offline::search . "fandom")
(access_log::enabled . "false")

View file

@ -13,6 +13,8 @@
"application-globals.rkt"
"config.rkt"
"data.rkt"
"search-provider-fandom.rkt"
"search-provider-solr.rkt"
"../lib/syntax.rkt"
"../lib/thread-utils.rkt"
"../lib/url-utils.rkt"
@ -22,47 +24,22 @@
(provide
page-search)
(module+ test
(require rackunit
"test-utils.rkt")
(define search-json-data
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
(define search-providers
(hash "fandom" generate-results-content-fandom
"solr" generate-results-content-solr))
;; this takes the info we gathered from fandom and makes the big fat x-expression page
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
(define search-results (jp "/query/search" data))
(define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f])
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
(generate-wiki-page
;; so I provide my helper function with the necessary context...
#:req req
#:source-url dest-url
#:source-url source-url
#:wikiname wikiname
#:title query
#:siteinfo siteinfo
;; and here's the actual results to display in the wiki page layout
`(div (@ (class "mw-parser-output"))
;; header before the search results showing how many we found
(p ,(format "~a results found for " (length search-results))
(strong ,query))
;; *u*nordered *l*ist of matching search results
(ul ,@(map
(λ (result) ;; for each result, run this code...
(let* ([title (jp "/title" result)]
[page-path (page-title->path title)]
[timestamp (jp "/timestamp" result)]
[wordcount (jp "/wordcount" result)]
[size (jp "/size" result)])
;; and make this x-expression...
`(li (@ (class "my-result"))
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
,title) ; using unquote to insert the result page title
(div (@ (class "my-result__info")) ; constructing the line under the search result
"last edited "
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
,(format ", ~a words, ~a kb"
wordcount
(exact->inexact (/ (round (/ size 100)) 10)))))))
search-results)))))
results-content))
;; will be called when the web browser asks to load the page
(define (page-search req)
@ -71,34 +48,31 @@
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
;; grab the first part to use as the wikiname, in this case, "minecraft"
(define wikiname (path/param-path (first (url-path (request-uri req)))))
;; grab a dict of url search params
(define params (url-query (request-uri req)))
;; grab the part after ?q= which is the search terms
(define query (dict-ref (url-query (request-uri req)) 'q #f))
;; constructing the URL where I want to get fandom data from...
(define origin (format "https://~a.fandom.com" wikiname))
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "query")
("list" . "search")
("srsearch" . ,query)
("formatversion" . "2")
("format" . "json")))))
(define query (dict-ref params 'q #f))
;; figure out which search provider we're going to use
(define search-provider (hash-ref search-providers (config-get 'feature_offline::search)))
;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name)
(define-values (dest-res siteinfo)
;; external special:search url to link at the bottom of the page as the upstream source
(define external-search-url
(format "https://~a.fandom.com/wiki/Special:Search?~a"
wikiname
(params->query `(("query" . ,query)
("search" . "internal")))))
;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name)
(define-values (results-content siteinfo)
(thread-values
(λ ()
(log-outgoing dest-url)
(easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results
(search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt")
(λ ()
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
;; search results are a JSON string. parse JSON into racket data structures
(define data (easy:response-json dest-res))
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
;; big fat x-expression goes into the body variable
(define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
(define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo))
;; error checking
(when (config-true? 'debug)
; used for its side effects
@ -110,7 +84,4 @@
#:headers (build-headers always-headers)
(λ (out)
(write-html body out)))))
(module+ test
(parameterize ([(config-parameter 'feature_offline::only) "false"])
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
(generate-results-page test-req "" "test" "Gacha" search-json-data))))))

View file

@ -0,0 +1,63 @@
#lang racket/base
(require racket/string
(prefix-in easy: net/http-easy)
"application-globals.rkt"
"config.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
generate-results-content-fandom)
(module+ test
(require rackunit
"test-utils.rkt")
(define search-json-data
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
(define (generate-results-content-fandom wikiname query params)
;; constructing the URL where I want to get fandom data from...
(define origin (format "https://~a.fandom.com" wikiname))
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
(define dest-url
(format "~a/api.php?~a"
origin
(params->query `(("action" . "query")
("list" . "search")
("srsearch" . ,query)
("formatversion" . "2")
("format" . "json")))))
;; HTTP request to dest-url for search results
(log-outgoing dest-url)
(define res (easy:get dest-url #:timeouts timeouts))
(define json (easy:response-json res))
(define search-results (jp "/query/search" json))
;; generate content for display in the wiki page layout
`(div (@ (class "mw-parser-output"))
;; header before the search results showing how many we found
(p ,(format "~a results found for " (length search-results))
(strong ,query))
;; *u*nordered *l*ist of matching search results
(ul ,@(for/list ([result search-results])
(let* ([title (jp "/title" result)]
[page-path (page-title->path title)]
[timestamp (jp "/timestamp" result)]
[wordcount (jp "/wordcount" result)]
[size (jp "/size" result)])
;; and make this x-expression...
`(li (@ (class "my-result"))
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
,title) ; using unquote to insert the result page title
(div (@ (class "my-result__info")) ; constructing the line under the search result
"last edited "
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
,(format ", ~a words, ~a kb"
wordcount
(exact->inexact (/ (round (/ size 100)) 10))))))))))
(module+ test
(parameterize ([(config-parameter 'feature_offline::only) "false"])
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
(generate-results-content-fandom test-req "" "test" "Gacha" search-json-data))))))

View file

@ -0,0 +1,90 @@
#lang racket/base
(require racket/dict
racket/string
(prefix-in easy: net/http-easy)
"application-globals.rkt"
"../lib/html-parsing/main.rkt"
"../lib/url-utils.rkt"
"whole-utils.rkt"
"../lib/xexpr-utils.rkt")
(provide
generate-results-content-solr)
(struct result^ (hl-title hl-body kb words page-path) #:transparent)
(define (generate-results-content-solr wikiname query params)
;; grab things from params that would modify the search
(define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND")))
(define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc")))
;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams=
(define dest-url
(format "http://localhost:8983/solr/~a/select?~a"
wikiname
(params->query `(("defType" . "edismax")
("q" . ,query)
("q.op" . ,(cdr op))
("qf" . "title_prefix title^2.0 body table^0.3")
("hl" . "true")
("hl.method" . "unified")
("hl.defaultSummary" . "true")
("hl.fl" . "title,body")
("fl" . "id,len,title")
("hl.encoder" . "html")
("hl.tag.pre" . "<mark>")
("hl.tag.post" . "</mark>")
("sort" . ,(cdr sort))))))
;; HTTP request to dest-url for search results
(log-outgoing dest-url)
(define res (easy:get dest-url #:timeouts timeouts))
(define json (easy:response-json res))
;; build result objects
(define highlighting (jp "/highlighting" json))
(define results
(for/list ([doc (jp "/response/docs" json)])
(define id (jp "/id" doc))
(define len (jp "/len" doc))
(define title (jp "/title" doc))
(define page-path (page-title->path title))
(define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1
(define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10
(define hl (hash-ref highlighting (string->symbol id)))
(define hl-title (cdr (html->xexp (jp "/title/0" hl))))
(define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl)))))
(result^ hl-title hl-body kb words page-path)))
(define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100)))
(define (value-selected? value current-value)
(append
`((value ,value))
(if (equal? value current-value)
`((selected))
`())))
;; generate content for display in the wiki page layout
`(div (@ (class "mw-parser-output"))
(form (@ (class "my-result__filter"))
(input (@ (type "hidden") (name "q") (value ,query)))
(select (@ (name "op"))
(option (@ ,@(value-selected? "and" (car op))) "All words must match")
(option (@ ,@(value-selected? "or" (car op))) "Some words must match"))
(select (@ (name "sort"))
(option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles")
(option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles"))
(button "Filter results"))
;; header before the search results showing how many we found
(p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime)
(strong ,query))
;; *u*nordered *l*ist of matching search results
(ul ,@(for/list ([result results])
`(li (@ (class "my-result"))
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url
,@(result^-hl-title result)) ; title
(p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview
(div (@ (class "my-result__info")) ; line under the search result
,(format "~a words, ~a kb of readable stuff"
(result^-words result)
(result^-kb result))))))))

View file

@ -249,11 +249,24 @@ a.ext-audiobutton { /* see hearthstone/wiki/Diablo_(Duels_hero) */
.my-result__link {
font-size: 1.2em;
}
.my-result__description {
font-size: 0.8em;
white-space: pre-line;
margin-left: 1.2em;
}
.my-result mark {
background: rgba(255, 255, 0, 0.4);
}
.my-result__info {
font-size: 0.8em;
color: var(--theme-page-text-color--hover);
margin-left: 1.2em;
}
.my-result__filter {
display: grid;
grid-template-columns: auto auto auto 1fr;
grid-gap: 8px;
}
/* (breezewiki) search suggestions */
.bw-search-form {