Implement Solr search provider
This commit is contained in:
		
							parent
							
								
									76eaaa0a47
								
							
						
					
					
						commit
						61c304cf41
					
				
					 5 changed files with 192 additions and 54 deletions
				
			
		|  | @ -47,6 +47,7 @@ | |||
|     (feature_offline::enabled . "false") | ||||
|     (feature_offline::format . "json.gz") | ||||
|     (feature_offline::only . "false") | ||||
|     (feature_offline::search . "fandom") | ||||
| 
 | ||||
|     (access_log::enabled . "false") | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,8 @@ | |||
|          "application-globals.rkt" | ||||
|          "config.rkt" | ||||
|          "data.rkt" | ||||
|          "search-provider-fandom.rkt" | ||||
|          "search-provider-solr.rkt" | ||||
|          "../lib/syntax.rkt" | ||||
|          "../lib/thread-utils.rkt" | ||||
|          "../lib/url-utils.rkt" | ||||
|  | @ -22,47 +24,22 @@ | |||
| (provide | ||||
|  page-search) | ||||
| 
 | ||||
| (module+ test | ||||
|   (require rackunit | ||||
|            "test-utils.rkt") | ||||
|   (define search-json-data | ||||
|     '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) | ||||
| (define search-providers | ||||
|   (hash "fandom" generate-results-content-fandom | ||||
|         "solr" generate-results-content-solr)) | ||||
| 
 | ||||
| ;; this takes the info we gathered from fandom and makes the big fat x-expression page | ||||
| (define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f]) | ||||
|   (define search-results (jp "/query/search" data)) | ||||
| (define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f]) | ||||
|   ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle | ||||
|   (generate-wiki-page | ||||
|    ;; so I provide my helper function with the necessary context... | ||||
|    #:req req | ||||
|    #:source-url dest-url | ||||
|    #:source-url source-url | ||||
|    #:wikiname wikiname | ||||
|    #:title query | ||||
|    #:siteinfo siteinfo | ||||
|    ;; and here's the actual results to display in the wiki page layout | ||||
|    `(div (@ (class "mw-parser-output")) | ||||
|          ;; header before the search results showing how many we found | ||||
|          (p ,(format "~a results found for " (length search-results)) | ||||
|             (strong ,query)) | ||||
|          ;; *u*nordered *l*ist of matching search results | ||||
|          (ul ,@(map | ||||
|                 (λ (result) ;; for each result, run this code... | ||||
|                   (let* ([title (jp "/title" result)] | ||||
|                          [page-path (page-title->path title)] | ||||
|                          [timestamp (jp "/timestamp" result)] | ||||
|                          [wordcount (jp "/wordcount" result)] | ||||
|                          [size (jp "/size" result)]) | ||||
|                     ;; and make this x-expression... | ||||
|                     `(li (@ (class "my-result")) | ||||
|                          (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL | ||||
|                             ,title) ; using unquote to insert the result page title | ||||
|                          (div (@ (class "my-result__info")) ; constructing the line under the search result | ||||
|                               "last edited " | ||||
|                               (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) | ||||
|                               ,(format ", ~a words, ~a kb" | ||||
|                                        wordcount | ||||
|                                        (exact->inexact (/ (round (/ size 100)) 10))))))) | ||||
|                 search-results))))) | ||||
|    results-content)) | ||||
| 
 | ||||
| ;; will be called when the web browser asks to load the page | ||||
| (define (page-search req) | ||||
|  | @ -71,34 +48,31 @@ | |||
|    ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" | ||||
|    ;; grab the first part to use as the wikiname, in this case, "minecraft" | ||||
|    (define wikiname (path/param-path (first (url-path (request-uri req))))) | ||||
|    ;; grab a dict of url search params | ||||
|    (define params (url-query (request-uri req))) | ||||
|    ;; grab the part after ?q= which is the search terms | ||||
|    (define query (dict-ref (url-query (request-uri req)) 'q #f)) | ||||
|    ;; constructing the URL where I want to get fandom data from... | ||||
|    (define origin (format "https://~a.fandom.com" wikiname)) | ||||
|    ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json | ||||
|    (define dest-url | ||||
|      (format "~a/api.php?~a" | ||||
|              origin | ||||
|              (params->query `(("action" . "query") | ||||
|                               ("list" . "search") | ||||
|                               ("srsearch" . ,query) | ||||
|                               ("formatversion" . "2") | ||||
|                               ("format" . "json"))))) | ||||
|    (define query (dict-ref params 'q #f)) | ||||
|    ;; figure out which search provider we're going to use | ||||
|    (define search-provider (hash-ref search-providers (config-get 'feature_offline::search))) | ||||
| 
 | ||||
|    ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name) | ||||
|    (define-values (dest-res siteinfo) | ||||
|    ;; external special:search url to link at the bottom of the page as the upstream source | ||||
|    (define external-search-url | ||||
|      (format "https://~a.fandom.com/wiki/Special:Search?~a" | ||||
|              wikiname | ||||
|              (params->query `(("query" . ,query) | ||||
|                               ("search" . "internal"))))) | ||||
| 
 | ||||
|    ;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name) | ||||
|    (define-values (results-content siteinfo) | ||||
|      (thread-values | ||||
|       (λ () | ||||
|         (log-outgoing dest-url) | ||||
|         (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results | ||||
|         (search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt") | ||||
|       (λ () | ||||
|         (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki | ||||
| 
 | ||||
|    ;; search results are a JSON string. parse JSON into racket data structures | ||||
|    (define data (easy:response-json dest-res)) | ||||
|    ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression | ||||
|    ;; big fat x-expression goes into the body variable | ||||
|    (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) | ||||
|    (define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo)) | ||||
|    ;; error checking | ||||
|    (when (config-true? 'debug) | ||||
|      ; used for its side effects | ||||
|  | @ -110,7 +84,4 @@ | |||
|     #:headers (build-headers always-headers) | ||||
|     (λ (out) | ||||
|       (write-html body out))))) | ||||
| (module+ test | ||||
|   (parameterize ([(config-parameter 'feature_offline::only) "false"]) | ||||
|     (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") | ||||
|                                       (generate-results-page test-req "" "test" "Gacha" search-json-data)))))) | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | |||
| #lang racket/base | ||||
| (require racket/string | ||||
|          (prefix-in easy: net/http-easy) | ||||
|          "application-globals.rkt" | ||||
|          "config.rkt" | ||||
|          "../lib/url-utils.rkt" | ||||
|          "whole-utils.rkt" | ||||
|          "../lib/xexpr-utils.rkt") | ||||
| 
 | ||||
| (provide | ||||
|  generate-results-content-fandom) | ||||
| 
 | ||||
| (module+ test | ||||
|   (require rackunit | ||||
|            "test-utils.rkt") | ||||
|   (define search-json-data | ||||
|     '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) | ||||
| 
 | ||||
| (define (generate-results-content-fandom wikiname query params) | ||||
|   ;; constructing the URL where I want to get fandom data from... | ||||
|   (define origin (format "https://~a.fandom.com" wikiname)) | ||||
|   ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json | ||||
|   (define dest-url | ||||
|     (format "~a/api.php?~a" | ||||
|             origin | ||||
|             (params->query `(("action" . "query") | ||||
|                              ("list" . "search") | ||||
|                              ("srsearch" . ,query) | ||||
|                              ("formatversion" . "2") | ||||
|                              ("format" . "json"))))) | ||||
|   ;; HTTP request to dest-url for search results | ||||
|   (log-outgoing dest-url) | ||||
|   (define res (easy:get dest-url #:timeouts timeouts)) | ||||
|   (define json (easy:response-json res)) | ||||
|   (define search-results (jp "/query/search" json)) | ||||
| 
 | ||||
|   ;; generate content for display in the wiki page layout | ||||
|   `(div (@ (class "mw-parser-output")) | ||||
|         ;; header before the search results showing how many we found | ||||
|         (p ,(format "~a results found for " (length search-results)) | ||||
|            (strong ,query)) | ||||
|         ;; *u*nordered *l*ist of matching search results | ||||
|         (ul ,@(for/list ([result search-results]) | ||||
|                 (let* ([title (jp "/title" result)] | ||||
|                        [page-path (page-title->path title)] | ||||
|                        [timestamp (jp "/timestamp" result)] | ||||
|                        [wordcount (jp "/wordcount" result)] | ||||
|                        [size (jp "/size" result)]) | ||||
|                   ;; and make this x-expression... | ||||
|                   `(li (@ (class "my-result")) | ||||
|                        (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL | ||||
|                           ,title) ; using unquote to insert the result page title | ||||
|                        (div (@ (class "my-result__info")) ; constructing the line under the search result | ||||
|                             "last edited " | ||||
|                             (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) | ||||
|                             ,(format ", ~a words, ~a kb" | ||||
|                                      wordcount | ||||
|                                      (exact->inexact (/ (round (/ size 100)) 10)))))))))) | ||||
| 
 | ||||
| (module+ test | ||||
|   (parameterize ([(config-parameter 'feature_offline::only) "false"]) | ||||
|     (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") | ||||
|                                       (generate-results-content-fandom test-req "" "test" "Gacha" search-json-data)))))) | ||||
							
								
								
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,90 @@ | |||
| #lang racket/base | ||||
| (require racket/dict | ||||
|          racket/string | ||||
|          (prefix-in easy: net/http-easy) | ||||
|          "application-globals.rkt" | ||||
|          "../lib/html-parsing/main.rkt" | ||||
|          "../lib/url-utils.rkt" | ||||
|          "whole-utils.rkt" | ||||
|          "../lib/xexpr-utils.rkt") | ||||
| 
 | ||||
| (provide | ||||
|  generate-results-content-solr) | ||||
| 
 | ||||
| (struct result^ (hl-title hl-body kb words page-path) #:transparent) | ||||
| 
 | ||||
| (define (generate-results-content-solr wikiname query params) | ||||
|   ;; grab things from params that would modify the search | ||||
|   (define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND"))) | ||||
|   (define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc"))) | ||||
| 
 | ||||
|   ;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams= | ||||
|   (define dest-url | ||||
|     (format "http://localhost:8983/solr/~a/select?~a" | ||||
|             wikiname | ||||
|             (params->query `(("defType" . "edismax") | ||||
|                              ("q" . ,query) | ||||
|                              ("q.op" . ,(cdr op)) | ||||
|                              ("qf" . "title_prefix title^2.0 body table^0.3") | ||||
|                              ("hl" . "true") | ||||
|                              ("hl.method" . "unified") | ||||
|                              ("hl.defaultSummary" . "true") | ||||
|                              ("hl.fl" . "title,body") | ||||
|                              ("fl" . "id,len,title") | ||||
|                              ("hl.encoder" . "html") | ||||
|                              ("hl.tag.pre" . "<mark>") | ||||
|                              ("hl.tag.post" . "</mark>") | ||||
|                              ("sort" . ,(cdr sort)))))) | ||||
|   ;; HTTP request to dest-url for search results | ||||
|   (log-outgoing dest-url) | ||||
|   (define res (easy:get dest-url #:timeouts timeouts)) | ||||
|   (define json (easy:response-json res)) | ||||
| 
 | ||||
|   ;; build result objects | ||||
|   (define highlighting (jp "/highlighting" json)) | ||||
|   (define results | ||||
|     (for/list ([doc (jp "/response/docs" json)]) | ||||
|       (define id (jp "/id" doc)) | ||||
|       (define len (jp "/len" doc)) | ||||
|       (define title (jp "/title" doc)) | ||||
|       (define page-path (page-title->path title)) | ||||
|       (define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1 | ||||
|       (define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10 | ||||
|       (define hl (hash-ref highlighting (string->symbol id))) | ||||
|       (define hl-title (cdr (html->xexp (jp "/title/0" hl)))) | ||||
|       (define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl))))) | ||||
|       (result^ hl-title hl-body kb words page-path))) | ||||
| 
 | ||||
|   (define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100))) | ||||
| 
 | ||||
|   (define (value-selected? value current-value) | ||||
|     (append | ||||
|      `((value ,value)) | ||||
|      (if (equal? value current-value) | ||||
|          `((selected)) | ||||
|          `()))) | ||||
| 
 | ||||
|   ;; generate content for display in the wiki page layout | ||||
|   `(div (@ (class "mw-parser-output")) | ||||
|         (form (@ (class "my-result__filter")) | ||||
|               (input (@ (type "hidden") (name "q") (value ,query))) | ||||
|               (select (@ (name "op")) | ||||
|                       (option (@ ,@(value-selected? "and" (car op))) "All words must match") | ||||
|                       (option (@ ,@(value-selected? "or" (car op))) "Some words must match")) | ||||
|               (select (@ (name "sort")) | ||||
|                       (option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles") | ||||
|                       (option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles")) | ||||
|               (button "Filter results")) | ||||
|         ;; header before the search results showing how many we found | ||||
|         (p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime) | ||||
|            (strong ,query)) | ||||
|         ;; *u*nordered *l*ist of matching search results | ||||
|         (ul ,@(for/list ([result results]) | ||||
|                 `(li (@ (class "my-result")) | ||||
|                      (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url | ||||
|                         ,@(result^-hl-title result)) ; title | ||||
|                      (p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview | ||||
|                      (div (@ (class "my-result__info")) ; line under the search result | ||||
|                           ,(format "~a words, ~a kb of readable stuff" | ||||
|                                    (result^-words result) | ||||
|                                    (result^-kb result)))))))) | ||||
|  | @ -249,11 +249,24 @@ a.ext-audiobutton { /* see hearthstone/wiki/Diablo_(Duels_hero) */ | |||
| .my-result__link { | ||||
|     font-size: 1.2em; | ||||
| } | ||||
| .my-result__description { | ||||
|     font-size: 0.8em; | ||||
|     white-space: pre-line; | ||||
|     margin-left: 1.2em; | ||||
| } | ||||
| .my-result mark { | ||||
|     background: rgba(255, 255, 0, 0.4); | ||||
| } | ||||
| .my-result__info { | ||||
|     font-size: 0.8em; | ||||
|     color: var(--theme-page-text-color--hover); | ||||
|     margin-left: 1.2em; | ||||
| } | ||||
| .my-result__filter { | ||||
|     display: grid; | ||||
|     grid-template-columns: auto auto auto 1fr; | ||||
|     grid-gap: 8px; | ||||
| } | ||||
| 
 | ||||
| /* (breezewiki) search suggestions */ | ||||
| .bw-search-form { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue