forked from cadence/breezewiki
		
	Implement Solr search provider
This commit is contained in:
		
							parent
							
								
									76eaaa0a47
								
							
						
					
					
						commit
						61c304cf41
					
				
					 5 changed files with 192 additions and 54 deletions
				
			
		|  | @ -47,6 +47,7 @@ | ||||||
|     (feature_offline::enabled . "false") |     (feature_offline::enabled . "false") | ||||||
|     (feature_offline::format . "json.gz") |     (feature_offline::format . "json.gz") | ||||||
|     (feature_offline::only . "false") |     (feature_offline::only . "false") | ||||||
|  |     (feature_offline::search . "fandom") | ||||||
| 
 | 
 | ||||||
|     (access_log::enabled . "false") |     (access_log::enabled . "false") | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -13,6 +13,8 @@ | ||||||
|          "application-globals.rkt" |          "application-globals.rkt" | ||||||
|          "config.rkt" |          "config.rkt" | ||||||
|          "data.rkt" |          "data.rkt" | ||||||
|  |          "search-provider-fandom.rkt" | ||||||
|  |          "search-provider-solr.rkt" | ||||||
|          "../lib/syntax.rkt" |          "../lib/syntax.rkt" | ||||||
|          "../lib/thread-utils.rkt" |          "../lib/thread-utils.rkt" | ||||||
|          "../lib/url-utils.rkt" |          "../lib/url-utils.rkt" | ||||||
|  | @ -22,47 +24,22 @@ | ||||||
| (provide | (provide | ||||||
|  page-search) |  page-search) | ||||||
| 
 | 
 | ||||||
| (module+ test | (define search-providers | ||||||
|   (require rackunit |   (hash "fandom" generate-results-content-fandom | ||||||
|            "test-utils.rkt") |         "solr" generate-results-content-solr)) | ||||||
|   (define search-json-data |  | ||||||
|     '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) |  | ||||||
| 
 | 
 | ||||||
| ;; this takes the info we gathered from fandom and makes the big fat x-expression page | ;; this takes the info we gathered from fandom and makes the big fat x-expression page | ||||||
| (define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f]) | (define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f]) | ||||||
|   (define search-results (jp "/query/search" data)) |  | ||||||
|   ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle |   ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle | ||||||
|   (generate-wiki-page |   (generate-wiki-page | ||||||
|    ;; so I provide my helper function with the necessary context... |    ;; so I provide my helper function with the necessary context... | ||||||
|    #:req req |    #:req req | ||||||
|    #:source-url dest-url |    #:source-url source-url | ||||||
|    #:wikiname wikiname |    #:wikiname wikiname | ||||||
|    #:title query |    #:title query | ||||||
|    #:siteinfo siteinfo |    #:siteinfo siteinfo | ||||||
|    ;; and here's the actual results to display in the wiki page layout |    ;; and here's the actual results to display in the wiki page layout | ||||||
|    `(div (@ (class "mw-parser-output")) |    results-content)) | ||||||
|          ;; header before the search results showing how many we found |  | ||||||
|          (p ,(format "~a results found for " (length search-results)) |  | ||||||
|             (strong ,query)) |  | ||||||
|          ;; *u*nordered *l*ist of matching search results |  | ||||||
|          (ul ,@(map |  | ||||||
|                 (λ (result) ;; for each result, run this code... |  | ||||||
|                   (let* ([title (jp "/title" result)] |  | ||||||
|                          [page-path (page-title->path title)] |  | ||||||
|                          [timestamp (jp "/timestamp" result)] |  | ||||||
|                          [wordcount (jp "/wordcount" result)] |  | ||||||
|                          [size (jp "/size" result)]) |  | ||||||
|                     ;; and make this x-expression... |  | ||||||
|                     `(li (@ (class "my-result")) |  | ||||||
|                          (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL |  | ||||||
|                             ,title) ; using unquote to insert the result page title |  | ||||||
|                          (div (@ (class "my-result__info")) ; constructing the line under the search result |  | ||||||
|                               "last edited " |  | ||||||
|                               (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) |  | ||||||
|                               ,(format ", ~a words, ~a kb" |  | ||||||
|                                        wordcount |  | ||||||
|                                        (exact->inexact (/ (round (/ size 100)) 10))))))) |  | ||||||
|                 search-results))))) |  | ||||||
| 
 | 
 | ||||||
| ;; will be called when the web browser asks to load the page | ;; will be called when the web browser asks to load the page | ||||||
| (define (page-search req) | (define (page-search req) | ||||||
|  | @ -71,34 +48,31 @@ | ||||||
|    ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" |    ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner" | ||||||
|    ;; grab the first part to use as the wikiname, in this case, "minecraft" |    ;; grab the first part to use as the wikiname, in this case, "minecraft" | ||||||
|    (define wikiname (path/param-path (first (url-path (request-uri req))))) |    (define wikiname (path/param-path (first (url-path (request-uri req))))) | ||||||
|  |    ;; grab a dict of url search params | ||||||
|  |    (define params (url-query (request-uri req))) | ||||||
|    ;; grab the part after ?q= which is the search terms |    ;; grab the part after ?q= which is the search terms | ||||||
|    (define query (dict-ref (url-query (request-uri req)) 'q #f)) |    (define query (dict-ref params 'q #f)) | ||||||
|    ;; constructing the URL where I want to get fandom data from... |    ;; figure out which search provider we're going to use | ||||||
|    (define origin (format "https://~a.fandom.com" wikiname)) |    (define search-provider (hash-ref search-providers (config-get 'feature_offline::search))) | ||||||
|    ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json |  | ||||||
|    (define dest-url |  | ||||||
|      (format "~a/api.php?~a" |  | ||||||
|              origin |  | ||||||
|              (params->query `(("action" . "query") |  | ||||||
|                               ("list" . "search") |  | ||||||
|                               ("srsearch" . ,query) |  | ||||||
|                               ("formatversion" . "2") |  | ||||||
|                               ("format" . "json"))))) |  | ||||||
| 
 | 
 | ||||||
|    ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name) |    ;; external special:search url to link at the bottom of the page as the upstream source | ||||||
|    (define-values (dest-res siteinfo) |    (define external-search-url | ||||||
|  |      (format "https://~a.fandom.com/wiki/Special:Search?~a" | ||||||
|  |              wikiname | ||||||
|  |              (params->query `(("query" . ,query) | ||||||
|  |                               ("search" . "internal"))))) | ||||||
|  | 
 | ||||||
|  |    ;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name) | ||||||
|  |    (define-values (results-content siteinfo) | ||||||
|      (thread-values |      (thread-values | ||||||
|       (λ () |       (λ () | ||||||
|         (log-outgoing dest-url) |         (search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt") | ||||||
|         (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results |  | ||||||
|       (λ () |       (λ () | ||||||
|         (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki |         (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki | ||||||
| 
 | 
 | ||||||
|    ;; search results are a JSON string. parse JSON into racket data structures |  | ||||||
|    (define data (easy:response-json dest-res)) |  | ||||||
|    ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression |    ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression | ||||||
|    ;; big fat x-expression goes into the body variable |    ;; big fat x-expression goes into the body variable | ||||||
|    (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo)) |    (define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo)) | ||||||
|    ;; error checking |    ;; error checking | ||||||
|    (when (config-true? 'debug) |    (when (config-true? 'debug) | ||||||
|      ; used for its side effects |      ; used for its side effects | ||||||
|  | @ -110,7 +84,4 @@ | ||||||
|     #:headers (build-headers always-headers) |     #:headers (build-headers always-headers) | ||||||
|     (λ (out) |     (λ (out) | ||||||
|       (write-html body out))))) |       (write-html body out))))) | ||||||
| (module+ test | 
 | ||||||
|   (parameterize ([(config-parameter 'feature_offline::only) "false"]) |  | ||||||
|     (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") |  | ||||||
|                                       (generate-results-page test-req "" "test" "Gacha" search-json-data)))))) |  | ||||||
|  |  | ||||||
							
								
								
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,63 @@ | ||||||
|  | #lang racket/base | ||||||
|  | (require racket/string | ||||||
|  |          (prefix-in easy: net/http-easy) | ||||||
|  |          "application-globals.rkt" | ||||||
|  |          "config.rkt" | ||||||
|  |          "../lib/url-utils.rkt" | ||||||
|  |          "whole-utils.rkt" | ||||||
|  |          "../lib/xexpr-utils.rkt") | ||||||
|  | 
 | ||||||
|  | (provide | ||||||
|  |  generate-results-content-fandom) | ||||||
|  | 
 | ||||||
|  | (module+ test | ||||||
|  |   (require rackunit | ||||||
|  |            "test-utils.rkt") | ||||||
|  |   (define search-json-data | ||||||
|  |     '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181))))))))) | ||||||
|  | 
 | ||||||
|  | (define (generate-results-content-fandom wikiname query params) | ||||||
|  |   ;; constructing the URL where I want to get fandom data from... | ||||||
|  |   (define origin (format "https://~a.fandom.com" wikiname)) | ||||||
|  |   ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json | ||||||
|  |   (define dest-url | ||||||
|  |     (format "~a/api.php?~a" | ||||||
|  |             origin | ||||||
|  |             (params->query `(("action" . "query") | ||||||
|  |                              ("list" . "search") | ||||||
|  |                              ("srsearch" . ,query) | ||||||
|  |                              ("formatversion" . "2") | ||||||
|  |                              ("format" . "json"))))) | ||||||
|  |   ;; HTTP request to dest-url for search results | ||||||
|  |   (log-outgoing dest-url) | ||||||
|  |   (define res (easy:get dest-url #:timeouts timeouts)) | ||||||
|  |   (define json (easy:response-json res)) | ||||||
|  |   (define search-results (jp "/query/search" json)) | ||||||
|  | 
 | ||||||
|  |   ;; generate content for display in the wiki page layout | ||||||
|  |   `(div (@ (class "mw-parser-output")) | ||||||
|  |         ;; header before the search results showing how many we found | ||||||
|  |         (p ,(format "~a results found for " (length search-results)) | ||||||
|  |            (strong ,query)) | ||||||
|  |         ;; *u*nordered *l*ist of matching search results | ||||||
|  |         (ul ,@(for/list ([result search-results]) | ||||||
|  |                 (let* ([title (jp "/title" result)] | ||||||
|  |                        [page-path (page-title->path title)] | ||||||
|  |                        [timestamp (jp "/timestamp" result)] | ||||||
|  |                        [wordcount (jp "/wordcount" result)] | ||||||
|  |                        [size (jp "/size" result)]) | ||||||
|  |                   ;; and make this x-expression... | ||||||
|  |                   `(li (@ (class "my-result")) | ||||||
|  |                        (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL | ||||||
|  |                           ,title) ; using unquote to insert the result page title | ||||||
|  |                        (div (@ (class "my-result__info")) ; constructing the line under the search result | ||||||
|  |                             "last edited " | ||||||
|  |                             (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0)) | ||||||
|  |                             ,(format ", ~a words, ~a kb" | ||||||
|  |                                      wordcount | ||||||
|  |                                      (exact->inexact (/ (round (/ size 100)) 10)))))))))) | ||||||
|  | 
 | ||||||
|  | (module+ test | ||||||
|  |   (parameterize ([(config-parameter 'feature_offline::only) "false"]) | ||||||
|  |     (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule") | ||||||
|  |                                       (generate-results-content-fandom test-req "" "test" "Gacha" search-json-data)))))) | ||||||
							
								
								
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,90 @@ | ||||||
|  | #lang racket/base | ||||||
|  | (require racket/dict | ||||||
|  |          racket/string | ||||||
|  |          (prefix-in easy: net/http-easy) | ||||||
|  |          "application-globals.rkt" | ||||||
|  |          "../lib/html-parsing/main.rkt" | ||||||
|  |          "../lib/url-utils.rkt" | ||||||
|  |          "whole-utils.rkt" | ||||||
|  |          "../lib/xexpr-utils.rkt") | ||||||
|  | 
 | ||||||
|  | (provide | ||||||
|  |  generate-results-content-solr) | ||||||
|  | 
 | ||||||
|  | (struct result^ (hl-title hl-body kb words page-path) #:transparent) | ||||||
|  | 
 | ||||||
|  | (define (generate-results-content-solr wikiname query params) | ||||||
|  |   ;; grab things from params that would modify the search | ||||||
|  |   (define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND"))) | ||||||
|  |   (define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc"))) | ||||||
|  | 
 | ||||||
|  |   ;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams= | ||||||
|  |   (define dest-url | ||||||
|  |     (format "http://localhost:8983/solr/~a/select?~a" | ||||||
|  |             wikiname | ||||||
|  |             (params->query `(("defType" . "edismax") | ||||||
|  |                              ("q" . ,query) | ||||||
|  |                              ("q.op" . ,(cdr op)) | ||||||
|  |                              ("qf" . "title_prefix title^2.0 body table^0.3") | ||||||
|  |                              ("hl" . "true") | ||||||
|  |                              ("hl.method" . "unified") | ||||||
|  |                              ("hl.defaultSummary" . "true") | ||||||
|  |                              ("hl.fl" . "title,body") | ||||||
|  |                              ("fl" . "id,len,title") | ||||||
|  |                              ("hl.encoder" . "html") | ||||||
|  |                              ("hl.tag.pre" . "<mark>") | ||||||
|  |                              ("hl.tag.post" . "</mark>") | ||||||
|  |                              ("sort" . ,(cdr sort)))))) | ||||||
|  |   ;; HTTP request to dest-url for search results | ||||||
|  |   (log-outgoing dest-url) | ||||||
|  |   (define res (easy:get dest-url #:timeouts timeouts)) | ||||||
|  |   (define json (easy:response-json res)) | ||||||
|  | 
 | ||||||
|  |   ;; build result objects | ||||||
|  |   (define highlighting (jp "/highlighting" json)) | ||||||
|  |   (define results | ||||||
|  |     (for/list ([doc (jp "/response/docs" json)]) | ||||||
|  |       (define id (jp "/id" doc)) | ||||||
|  |       (define len (jp "/len" doc)) | ||||||
|  |       (define title (jp "/title" doc)) | ||||||
|  |       (define page-path (page-title->path title)) | ||||||
|  |       (define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1 | ||||||
|  |       (define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10 | ||||||
|  |       (define hl (hash-ref highlighting (string->symbol id))) | ||||||
|  |       (define hl-title (cdr (html->xexp (jp "/title/0" hl)))) | ||||||
|  |       (define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl))))) | ||||||
|  |       (result^ hl-title hl-body kb words page-path))) | ||||||
|  | 
 | ||||||
|  |   (define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100))) | ||||||
|  | 
 | ||||||
|  |   (define (value-selected? value current-value) | ||||||
|  |     (append | ||||||
|  |      `((value ,value)) | ||||||
|  |      (if (equal? value current-value) | ||||||
|  |          `((selected)) | ||||||
|  |          `()))) | ||||||
|  | 
 | ||||||
|  |   ;; generate content for display in the wiki page layout | ||||||
|  |   `(div (@ (class "mw-parser-output")) | ||||||
|  |         (form (@ (class "my-result__filter")) | ||||||
|  |               (input (@ (type "hidden") (name "q") (value ,query))) | ||||||
|  |               (select (@ (name "op")) | ||||||
|  |                       (option (@ ,@(value-selected? "and" (car op))) "All words must match") | ||||||
|  |                       (option (@ ,@(value-selected? "or" (car op))) "Some words must match")) | ||||||
|  |               (select (@ (name "sort")) | ||||||
|  |                       (option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles") | ||||||
|  |                       (option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles")) | ||||||
|  |               (button "Filter results")) | ||||||
|  |         ;; header before the search results showing how many we found | ||||||
|  |         (p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime) | ||||||
|  |            (strong ,query)) | ||||||
|  |         ;; *u*nordered *l*ist of matching search results | ||||||
|  |         (ul ,@(for/list ([result results]) | ||||||
|  |                 `(li (@ (class "my-result")) | ||||||
|  |                      (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url | ||||||
|  |                         ,@(result^-hl-title result)) ; title | ||||||
|  |                      (p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview | ||||||
|  |                      (div (@ (class "my-result__info")) ; line under the search result | ||||||
|  |                           ,(format "~a words, ~a kb of readable stuff" | ||||||
|  |                                    (result^-words result) | ||||||
|  |                                    (result^-kb result)))))))) | ||||||
|  | @ -249,11 +249,24 @@ a.ext-audiobutton { /* see hearthstone/wiki/Diablo_(Duels_hero) */ | ||||||
| .my-result__link { | .my-result__link { | ||||||
|     font-size: 1.2em; |     font-size: 1.2em; | ||||||
| } | } | ||||||
|  | .my-result__description { | ||||||
|  |     font-size: 0.8em; | ||||||
|  |     white-space: pre-line; | ||||||
|  |     margin-left: 1.2em; | ||||||
|  | } | ||||||
|  | .my-result mark { | ||||||
|  |     background: rgba(255, 255, 0, 0.4); | ||||||
|  | } | ||||||
| .my-result__info { | .my-result__info { | ||||||
|     font-size: 0.8em; |     font-size: 0.8em; | ||||||
|     color: var(--theme-page-text-color--hover); |     color: var(--theme-page-text-color--hover); | ||||||
|     margin-left: 1.2em; |     margin-left: 1.2em; | ||||||
| } | } | ||||||
|  | .my-result__filter { | ||||||
|  |     display: grid; | ||||||
|  |     grid-template-columns: auto auto auto 1fr; | ||||||
|  |     grid-gap: 8px; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| /* (breezewiki) search suggestions */ | /* (breezewiki) search suggestions */ | ||||||
| .bw-search-form { | .bw-search-form { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue