forked from cadence/breezewiki
Implement Solr search provider
This commit is contained in:
parent
76eaaa0a47
commit
61c304cf41
5 changed files with 192 additions and 54 deletions
|
@ -47,6 +47,7 @@
|
||||||
(feature_offline::enabled . "false")
|
(feature_offline::enabled . "false")
|
||||||
(feature_offline::format . "json.gz")
|
(feature_offline::format . "json.gz")
|
||||||
(feature_offline::only . "false")
|
(feature_offline::only . "false")
|
||||||
|
(feature_offline::search . "fandom")
|
||||||
|
|
||||||
(access_log::enabled . "false")
|
(access_log::enabled . "false")
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,8 @@
|
||||||
"application-globals.rkt"
|
"application-globals.rkt"
|
||||||
"config.rkt"
|
"config.rkt"
|
||||||
"data.rkt"
|
"data.rkt"
|
||||||
|
"search-provider-fandom.rkt"
|
||||||
|
"search-provider-solr.rkt"
|
||||||
"../lib/syntax.rkt"
|
"../lib/syntax.rkt"
|
||||||
"../lib/thread-utils.rkt"
|
"../lib/thread-utils.rkt"
|
||||||
"../lib/url-utils.rkt"
|
"../lib/url-utils.rkt"
|
||||||
|
@ -22,47 +24,22 @@
|
||||||
(provide
|
(provide
|
||||||
page-search)
|
page-search)
|
||||||
|
|
||||||
(module+ test
|
(define search-providers
|
||||||
(require rackunit
|
(hash "fandom" generate-results-content-fandom
|
||||||
"test-utils.rkt")
|
"solr" generate-results-content-solr))
|
||||||
(define search-json-data
|
|
||||||
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
|
||||||
|
|
||||||
;; this takes the info we gathered from fandom and makes the big fat x-expression page
|
;; this takes the info we gathered from fandom and makes the big fat x-expression page
|
||||||
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
|
(define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f])
|
||||||
(define search-results (jp "/query/search" data))
|
|
||||||
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
|
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
|
||||||
(generate-wiki-page
|
(generate-wiki-page
|
||||||
;; so I provide my helper function with the necessary context...
|
;; so I provide my helper function with the necessary context...
|
||||||
#:req req
|
#:req req
|
||||||
#:source-url dest-url
|
#:source-url source-url
|
||||||
#:wikiname wikiname
|
#:wikiname wikiname
|
||||||
#:title query
|
#:title query
|
||||||
#:siteinfo siteinfo
|
#:siteinfo siteinfo
|
||||||
;; and here's the actual results to display in the wiki page layout
|
;; and here's the actual results to display in the wiki page layout
|
||||||
`(div (@ (class "mw-parser-output"))
|
results-content))
|
||||||
;; header before the search results showing how many we found
|
|
||||||
(p ,(format "~a results found for " (length search-results))
|
|
||||||
(strong ,query))
|
|
||||||
;; *u*nordered *l*ist of matching search results
|
|
||||||
(ul ,@(map
|
|
||||||
(λ (result) ;; for each result, run this code...
|
|
||||||
(let* ([title (jp "/title" result)]
|
|
||||||
[page-path (page-title->path title)]
|
|
||||||
[timestamp (jp "/timestamp" result)]
|
|
||||||
[wordcount (jp "/wordcount" result)]
|
|
||||||
[size (jp "/size" result)])
|
|
||||||
;; and make this x-expression...
|
|
||||||
`(li (@ (class "my-result"))
|
|
||||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
|
|
||||||
,title) ; using unquote to insert the result page title
|
|
||||||
(div (@ (class "my-result__info")) ; constructing the line under the search result
|
|
||||||
"last edited "
|
|
||||||
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
|
||||||
,(format ", ~a words, ~a kb"
|
|
||||||
wordcount
|
|
||||||
(exact->inexact (/ (round (/ size 100)) 10)))))))
|
|
||||||
search-results)))))
|
|
||||||
|
|
||||||
;; will be called when the web browser asks to load the page
|
;; will be called when the web browser asks to load the page
|
||||||
(define (page-search req)
|
(define (page-search req)
|
||||||
|
@ -71,34 +48,31 @@
|
||||||
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
|
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
|
||||||
;; grab the first part to use as the wikiname, in this case, "minecraft"
|
;; grab the first part to use as the wikiname, in this case, "minecraft"
|
||||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||||
|
;; grab a dict of url search params
|
||||||
|
(define params (url-query (request-uri req)))
|
||||||
;; grab the part after ?q= which is the search terms
|
;; grab the part after ?q= which is the search terms
|
||||||
(define query (dict-ref (url-query (request-uri req)) 'q #f))
|
(define query (dict-ref params 'q #f))
|
||||||
;; constructing the URL where I want to get fandom data from...
|
;; figure out which search provider we're going to use
|
||||||
(define origin (format "https://~a.fandom.com" wikiname))
|
(define search-provider (hash-ref search-providers (config-get 'feature_offline::search)))
|
||||||
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
|
|
||||||
(define dest-url
|
|
||||||
(format "~a/api.php?~a"
|
|
||||||
origin
|
|
||||||
(params->query `(("action" . "query")
|
|
||||||
("list" . "search")
|
|
||||||
("srsearch" . ,query)
|
|
||||||
("formatversion" . "2")
|
|
||||||
("format" . "json")))))
|
|
||||||
|
|
||||||
;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name)
|
;; external special:search url to link at the bottom of the page as the upstream source
|
||||||
(define-values (dest-res siteinfo)
|
(define external-search-url
|
||||||
|
(format "https://~a.fandom.com/wiki/Special:Search?~a"
|
||||||
|
wikiname
|
||||||
|
(params->query `(("query" . ,query)
|
||||||
|
("search" . "internal")))))
|
||||||
|
|
||||||
|
;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name)
|
||||||
|
(define-values (results-content siteinfo)
|
||||||
(thread-values
|
(thread-values
|
||||||
(λ ()
|
(λ ()
|
||||||
(log-outgoing dest-url)
|
(search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt")
|
||||||
(easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results
|
|
||||||
(λ ()
|
(λ ()
|
||||||
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
|
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
|
||||||
|
|
||||||
;; search results are a JSON string. parse JSON into racket data structures
|
|
||||||
(define data (easy:response-json dest-res))
|
|
||||||
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
|
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
|
||||||
;; big fat x-expression goes into the body variable
|
;; big fat x-expression goes into the body variable
|
||||||
(define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
|
(define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo))
|
||||||
;; error checking
|
;; error checking
|
||||||
(when (config-true? 'debug)
|
(when (config-true? 'debug)
|
||||||
; used for its side effects
|
; used for its side effects
|
||||||
|
@ -110,7 +84,4 @@
|
||||||
#:headers (build-headers always-headers)
|
#:headers (build-headers always-headers)
|
||||||
(λ (out)
|
(λ (out)
|
||||||
(write-html body out)))))
|
(write-html body out)))))
|
||||||
(module+ test
|
|
||||||
(parameterize ([(config-parameter 'feature_offline::only) "false"])
|
|
||||||
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
|
|
||||||
(generate-results-page test-req "" "test" "Gacha" search-json-data))))))
|
|
||||||
|
|
63
src/search-provider-fandom.rkt
Normal file
63
src/search-provider-fandom.rkt
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
#lang racket/base
|
||||||
|
(require racket/string
|
||||||
|
(prefix-in easy: net/http-easy)
|
||||||
|
"application-globals.rkt"
|
||||||
|
"config.rkt"
|
||||||
|
"../lib/url-utils.rkt"
|
||||||
|
"whole-utils.rkt"
|
||||||
|
"../lib/xexpr-utils.rkt")
|
||||||
|
|
||||||
|
(provide
|
||||||
|
generate-results-content-fandom)
|
||||||
|
|
||||||
|
(module+ test
|
||||||
|
(require rackunit
|
||||||
|
"test-utils.rkt")
|
||||||
|
(define search-json-data
|
||||||
|
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
||||||
|
|
||||||
|
(define (generate-results-content-fandom wikiname query params)
|
||||||
|
;; constructing the URL where I want to get fandom data from...
|
||||||
|
(define origin (format "https://~a.fandom.com" wikiname))
|
||||||
|
;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
|
||||||
|
(define dest-url
|
||||||
|
(format "~a/api.php?~a"
|
||||||
|
origin
|
||||||
|
(params->query `(("action" . "query")
|
||||||
|
("list" . "search")
|
||||||
|
("srsearch" . ,query)
|
||||||
|
("formatversion" . "2")
|
||||||
|
("format" . "json")))))
|
||||||
|
;; HTTP request to dest-url for search results
|
||||||
|
(log-outgoing dest-url)
|
||||||
|
(define res (easy:get dest-url #:timeouts timeouts))
|
||||||
|
(define json (easy:response-json res))
|
||||||
|
(define search-results (jp "/query/search" json))
|
||||||
|
|
||||||
|
;; generate content for display in the wiki page layout
|
||||||
|
`(div (@ (class "mw-parser-output"))
|
||||||
|
;; header before the search results showing how many we found
|
||||||
|
(p ,(format "~a results found for " (length search-results))
|
||||||
|
(strong ,query))
|
||||||
|
;; *u*nordered *l*ist of matching search results
|
||||||
|
(ul ,@(for/list ([result search-results])
|
||||||
|
(let* ([title (jp "/title" result)]
|
||||||
|
[page-path (page-title->path title)]
|
||||||
|
[timestamp (jp "/timestamp" result)]
|
||||||
|
[wordcount (jp "/wordcount" result)]
|
||||||
|
[size (jp "/size" result)])
|
||||||
|
;; and make this x-expression...
|
||||||
|
`(li (@ (class "my-result"))
|
||||||
|
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
|
||||||
|
,title) ; using unquote to insert the result page title
|
||||||
|
(div (@ (class "my-result__info")) ; constructing the line under the search result
|
||||||
|
"last edited "
|
||||||
|
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
||||||
|
,(format ", ~a words, ~a kb"
|
||||||
|
wordcount
|
||||||
|
(exact->inexact (/ (round (/ size 100)) 10))))))))))
|
||||||
|
|
||||||
|
(module+ test
|
||||||
|
(parameterize ([(config-parameter 'feature_offline::only) "false"])
|
||||||
|
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
|
||||||
|
(generate-results-content-fandom test-req "" "test" "Gacha" search-json-data))))))
|
90
src/search-provider-solr.rkt
Normal file
90
src/search-provider-solr.rkt
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
#lang racket/base
|
||||||
|
(require racket/dict
|
||||||
|
racket/string
|
||||||
|
(prefix-in easy: net/http-easy)
|
||||||
|
"application-globals.rkt"
|
||||||
|
"../lib/html-parsing/main.rkt"
|
||||||
|
"../lib/url-utils.rkt"
|
||||||
|
"whole-utils.rkt"
|
||||||
|
"../lib/xexpr-utils.rkt")
|
||||||
|
|
||||||
|
(provide
|
||||||
|
generate-results-content-solr)
|
||||||
|
|
||||||
|
(struct result^ (hl-title hl-body kb words page-path) #:transparent)
|
||||||
|
|
||||||
|
(define (generate-results-content-solr wikiname query params)
|
||||||
|
;; grab things from params that would modify the search
|
||||||
|
(define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND")))
|
||||||
|
(define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc")))
|
||||||
|
|
||||||
|
;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams=
|
||||||
|
(define dest-url
|
||||||
|
(format "http://localhost:8983/solr/~a/select?~a"
|
||||||
|
wikiname
|
||||||
|
(params->query `(("defType" . "edismax")
|
||||||
|
("q" . ,query)
|
||||||
|
("q.op" . ,(cdr op))
|
||||||
|
("qf" . "title_prefix title^2.0 body table^0.3")
|
||||||
|
("hl" . "true")
|
||||||
|
("hl.method" . "unified")
|
||||||
|
("hl.defaultSummary" . "true")
|
||||||
|
("hl.fl" . "title,body")
|
||||||
|
("fl" . "id,len,title")
|
||||||
|
("hl.encoder" . "html")
|
||||||
|
("hl.tag.pre" . "<mark>")
|
||||||
|
("hl.tag.post" . "</mark>")
|
||||||
|
("sort" . ,(cdr sort))))))
|
||||||
|
;; HTTP request to dest-url for search results
|
||||||
|
(log-outgoing dest-url)
|
||||||
|
(define res (easy:get dest-url #:timeouts timeouts))
|
||||||
|
(define json (easy:response-json res))
|
||||||
|
|
||||||
|
;; build result objects
|
||||||
|
(define highlighting (jp "/highlighting" json))
|
||||||
|
(define results
|
||||||
|
(for/list ([doc (jp "/response/docs" json)])
|
||||||
|
(define id (jp "/id" doc))
|
||||||
|
(define len (jp "/len" doc))
|
||||||
|
(define title (jp "/title" doc))
|
||||||
|
(define page-path (page-title->path title))
|
||||||
|
(define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1
|
||||||
|
(define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10
|
||||||
|
(define hl (hash-ref highlighting (string->symbol id)))
|
||||||
|
(define hl-title (cdr (html->xexp (jp "/title/0" hl))))
|
||||||
|
(define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl)))))
|
||||||
|
(result^ hl-title hl-body kb words page-path)))
|
||||||
|
|
||||||
|
(define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100)))
|
||||||
|
|
||||||
|
(define (value-selected? value current-value)
|
||||||
|
(append
|
||||||
|
`((value ,value))
|
||||||
|
(if (equal? value current-value)
|
||||||
|
`((selected))
|
||||||
|
`())))
|
||||||
|
|
||||||
|
;; generate content for display in the wiki page layout
|
||||||
|
`(div (@ (class "mw-parser-output"))
|
||||||
|
(form (@ (class "my-result__filter"))
|
||||||
|
(input (@ (type "hidden") (name "q") (value ,query)))
|
||||||
|
(select (@ (name "op"))
|
||||||
|
(option (@ ,@(value-selected? "and" (car op))) "All words must match")
|
||||||
|
(option (@ ,@(value-selected? "or" (car op))) "Some words must match"))
|
||||||
|
(select (@ (name "sort"))
|
||||||
|
(option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles")
|
||||||
|
(option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles"))
|
||||||
|
(button "Filter results"))
|
||||||
|
;; header before the search results showing how many we found
|
||||||
|
(p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime)
|
||||||
|
(strong ,query))
|
||||||
|
;; *u*nordered *l*ist of matching search results
|
||||||
|
(ul ,@(for/list ([result results])
|
||||||
|
`(li (@ (class "my-result"))
|
||||||
|
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url
|
||||||
|
,@(result^-hl-title result)) ; title
|
||||||
|
(p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview
|
||||||
|
(div (@ (class "my-result__info")) ; line under the search result
|
||||||
|
,(format "~a words, ~a kb of readable stuff"
|
||||||
|
(result^-words result)
|
||||||
|
(result^-kb result))))))))
|
|
@ -249,11 +249,24 @@ a.ext-audiobutton { /* see hearthstone/wiki/Diablo_(Duels_hero) */
|
||||||
.my-result__link {
|
.my-result__link {
|
||||||
font-size: 1.2em;
|
font-size: 1.2em;
|
||||||
}
|
}
|
||||||
|
.my-result__description {
|
||||||
|
font-size: 0.8em;
|
||||||
|
white-space: pre-line;
|
||||||
|
margin-left: 1.2em;
|
||||||
|
}
|
||||||
|
.my-result mark {
|
||||||
|
background: rgba(255, 255, 0, 0.4);
|
||||||
|
}
|
||||||
.my-result__info {
|
.my-result__info {
|
||||||
font-size: 0.8em;
|
font-size: 0.8em;
|
||||||
color: var(--theme-page-text-color--hover);
|
color: var(--theme-page-text-color--hover);
|
||||||
margin-left: 1.2em;
|
margin-left: 1.2em;
|
||||||
}
|
}
|
||||||
|
.my-result__filter {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: auto auto auto 1fr;
|
||||||
|
grid-gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
/* (breezewiki) search suggestions */
|
/* (breezewiki) search suggestions */
|
||||||
.bw-search-form {
|
.bw-search-form {
|
||||||
|
|
Loading…
Reference in a new issue