forked from cadence/breezewiki
		
	Implement Solr search provider
This commit is contained in:
		
							parent
							
								
									76eaaa0a47
								
							
						
					
					
						commit
						61c304cf41
					
				
					 5 changed files with 192 additions and 54 deletions
				
			
		| 
						 | 
				
			
			@ -47,6 +47,7 @@
 | 
			
		|||
    (feature_offline::enabled . "false")
 | 
			
		||||
    (feature_offline::format . "json.gz")
 | 
			
		||||
    (feature_offline::only . "false")
 | 
			
		||||
    (feature_offline::search . "fandom")
 | 
			
		||||
 | 
			
		||||
    (access_log::enabled . "false")
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,6 +13,8 @@
 | 
			
		|||
         "application-globals.rkt"
 | 
			
		||||
         "config.rkt"
 | 
			
		||||
         "data.rkt"
 | 
			
		||||
         "search-provider-fandom.rkt"
 | 
			
		||||
         "search-provider-solr.rkt"
 | 
			
		||||
         "../lib/syntax.rkt"
 | 
			
		||||
         "../lib/thread-utils.rkt"
 | 
			
		||||
         "../lib/url-utils.rkt"
 | 
			
		||||
| 
						 | 
				
			
			@ -22,47 +24,22 @@
 | 
			
		|||
(provide
 | 
			
		||||
 page-search)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require rackunit
 | 
			
		||||
           "test-utils.rkt")
 | 
			
		||||
  (define search-json-data
 | 
			
		||||
    '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
 | 
			
		||||
(define search-providers
 | 
			
		||||
  (hash "fandom" generate-results-content-fandom
 | 
			
		||||
        "solr" generate-results-content-solr))
 | 
			
		||||
 | 
			
		||||
;; this takes the info we gathered from fandom and makes the big fat x-expression page
 | 
			
		||||
(define (generate-results-page req dest-url wikiname query data #:siteinfo [siteinfo #f])
 | 
			
		||||
  (define search-results (jp "/query/search" data))
 | 
			
		||||
(define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f])
 | 
			
		||||
  ;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
 | 
			
		||||
  (generate-wiki-page
 | 
			
		||||
   ;; so I provide my helper function with the necessary context...
 | 
			
		||||
   #:req req
 | 
			
		||||
   #:source-url dest-url
 | 
			
		||||
   #:source-url source-url
 | 
			
		||||
   #:wikiname wikiname
 | 
			
		||||
   #:title query
 | 
			
		||||
   #:siteinfo siteinfo
 | 
			
		||||
   ;; and here's the actual results to display in the wiki page layout
 | 
			
		||||
   `(div (@ (class "mw-parser-output"))
 | 
			
		||||
         ;; header before the search results showing how many we found
 | 
			
		||||
         (p ,(format "~a results found for " (length search-results))
 | 
			
		||||
            (strong ,query))
 | 
			
		||||
         ;; *u*nordered *l*ist of matching search results
 | 
			
		||||
         (ul ,@(map
 | 
			
		||||
                (λ (result) ;; for each result, run this code...
 | 
			
		||||
                  (let* ([title (jp "/title" result)]
 | 
			
		||||
                         [page-path (page-title->path title)]
 | 
			
		||||
                         [timestamp (jp "/timestamp" result)]
 | 
			
		||||
                         [wordcount (jp "/wordcount" result)]
 | 
			
		||||
                         [size (jp "/size" result)])
 | 
			
		||||
                    ;; and make this x-expression...
 | 
			
		||||
                    `(li (@ (class "my-result"))
 | 
			
		||||
                         (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
 | 
			
		||||
                            ,title) ; using unquote to insert the result page title
 | 
			
		||||
                         (div (@ (class "my-result__info")) ; constructing the line under the search result
 | 
			
		||||
                              "last edited "
 | 
			
		||||
                              (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
 | 
			
		||||
                              ,(format ", ~a words, ~a kb"
 | 
			
		||||
                                       wordcount
 | 
			
		||||
                                       (exact->inexact (/ (round (/ size 100)) 10)))))))
 | 
			
		||||
                search-results)))))
 | 
			
		||||
   results-content))
 | 
			
		||||
 | 
			
		||||
;; will be called when the web browser asks to load the page
 | 
			
		||||
(define (page-search req)
 | 
			
		||||
| 
						 | 
				
			
			@ -71,34 +48,31 @@
 | 
			
		|||
   ;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
 | 
			
		||||
   ;; grab the first part to use as the wikiname, in this case, "minecraft"
 | 
			
		||||
   (define wikiname (path/param-path (first (url-path (request-uri req)))))
 | 
			
		||||
   ;; grab a dict of url search params
 | 
			
		||||
   (define params (url-query (request-uri req)))
 | 
			
		||||
   ;; grab the part after ?q= which is the search terms
 | 
			
		||||
   (define query (dict-ref (url-query (request-uri req)) 'q #f))
 | 
			
		||||
   ;; constructing the URL where I want to get fandom data from...
 | 
			
		||||
   (define origin (format "https://~a.fandom.com" wikiname))
 | 
			
		||||
   ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
 | 
			
		||||
   (define dest-url
 | 
			
		||||
     (format "~a/api.php?~a"
 | 
			
		||||
             origin
 | 
			
		||||
             (params->query `(("action" . "query")
 | 
			
		||||
                              ("list" . "search")
 | 
			
		||||
                              ("srsearch" . ,query)
 | 
			
		||||
                              ("formatversion" . "2")
 | 
			
		||||
                              ("format" . "json")))))
 | 
			
		||||
   (define query (dict-ref params 'q #f))
 | 
			
		||||
   ;; figure out which search provider we're going to use
 | 
			
		||||
   (define search-provider (hash-ref search-providers (config-get 'feature_offline::search)))
 | 
			
		||||
 | 
			
		||||
   ;; simultaneously get the search results from the fandom API, as well as information about the wiki as a whole (its license, icon, name)
 | 
			
		||||
   (define-values (dest-res siteinfo)
 | 
			
		||||
   ;; external special:search url to link at the bottom of the page as the upstream source
 | 
			
		||||
   (define external-search-url
 | 
			
		||||
     (format "https://~a.fandom.com/wiki/Special:Search?~a"
 | 
			
		||||
             wikiname
 | 
			
		||||
             (params->query `(("query" . ,query)
 | 
			
		||||
                              ("search" . "internal")))))
 | 
			
		||||
 | 
			
		||||
   ;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name)
 | 
			
		||||
   (define-values (results-content siteinfo)
 | 
			
		||||
     (thread-values
 | 
			
		||||
      (λ ()
 | 
			
		||||
        (log-outgoing dest-url)
 | 
			
		||||
        (easy:get dest-url #:timeouts timeouts)) ;; HTTP request to dest-url for search results
 | 
			
		||||
        (search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt")
 | 
			
		||||
      (λ ()
 | 
			
		||||
        (siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
 | 
			
		||||
 | 
			
		||||
   ;; search results are a JSON string. parse JSON into racket data structures
 | 
			
		||||
   (define data (easy:response-json dest-res))
 | 
			
		||||
   ;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
 | 
			
		||||
   ;; big fat x-expression goes into the body variable
 | 
			
		||||
   (define body (generate-results-page req dest-url wikiname query data #:siteinfo siteinfo))
 | 
			
		||||
   (define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo))
 | 
			
		||||
   ;; error checking
 | 
			
		||||
   (when (config-true? 'debug)
 | 
			
		||||
     ; used for its side effects
 | 
			
		||||
| 
						 | 
				
			
			@ -110,7 +84,4 @@
 | 
			
		|||
    #:headers (build-headers always-headers)
 | 
			
		||||
    (λ (out)
 | 
			
		||||
      (write-html body out)))))
 | 
			
		||||
(module+ test
 | 
			
		||||
  (parameterize ([(config-parameter 'feature_offline::only) "false"])
 | 
			
		||||
    (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
 | 
			
		||||
                                      (generate-results-page test-req "" "test" "Gacha" search-json-data))))))
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								src/search-provider-fandom.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,63 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/string
 | 
			
		||||
         (prefix-in easy: net/http-easy)
 | 
			
		||||
         "application-globals.rkt"
 | 
			
		||||
         "config.rkt"
 | 
			
		||||
         "../lib/url-utils.rkt"
 | 
			
		||||
         "whole-utils.rkt"
 | 
			
		||||
         "../lib/xexpr-utils.rkt")
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 generate-results-content-fandom)
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (require rackunit
 | 
			
		||||
           "test-utils.rkt")
 | 
			
		||||
  (define search-json-data
 | 
			
		||||
    '#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
 | 
			
		||||
 | 
			
		||||
(define (generate-results-content-fandom wikiname query params)
 | 
			
		||||
  ;; constructing the URL where I want to get fandom data from...
 | 
			
		||||
  (define origin (format "https://~a.fandom.com" wikiname))
 | 
			
		||||
  ;; the dest-URL will look something like https://minecraft.fandom.com/api.php?action=query&list=search&srsearch=Spawner&formatversion=2&format=json
 | 
			
		||||
  (define dest-url
 | 
			
		||||
    (format "~a/api.php?~a"
 | 
			
		||||
            origin
 | 
			
		||||
            (params->query `(("action" . "query")
 | 
			
		||||
                             ("list" . "search")
 | 
			
		||||
                             ("srsearch" . ,query)
 | 
			
		||||
                             ("formatversion" . "2")
 | 
			
		||||
                             ("format" . "json")))))
 | 
			
		||||
  ;; HTTP request to dest-url for search results
 | 
			
		||||
  (log-outgoing dest-url)
 | 
			
		||||
  (define res (easy:get dest-url #:timeouts timeouts))
 | 
			
		||||
  (define json (easy:response-json res))
 | 
			
		||||
  (define search-results (jp "/query/search" json))
 | 
			
		||||
 | 
			
		||||
  ;; generate content for display in the wiki page layout
 | 
			
		||||
  `(div (@ (class "mw-parser-output"))
 | 
			
		||||
        ;; header before the search results showing how many we found
 | 
			
		||||
        (p ,(format "~a results found for " (length search-results))
 | 
			
		||||
           (strong ,query))
 | 
			
		||||
        ;; *u*nordered *l*ist of matching search results
 | 
			
		||||
        (ul ,@(for/list ([result search-results])
 | 
			
		||||
                (let* ([title (jp "/title" result)]
 | 
			
		||||
                       [page-path (page-title->path title)]
 | 
			
		||||
                       [timestamp (jp "/timestamp" result)]
 | 
			
		||||
                       [wordcount (jp "/wordcount" result)]
 | 
			
		||||
                       [size (jp "/size" result)])
 | 
			
		||||
                  ;; and make this x-expression...
 | 
			
		||||
                  `(li (@ (class "my-result"))
 | 
			
		||||
                       (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
 | 
			
		||||
                          ,title) ; using unquote to insert the result page title
 | 
			
		||||
                       (div (@ (class "my-result__info")) ; constructing the line under the search result
 | 
			
		||||
                            "last edited "
 | 
			
		||||
                            (time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
 | 
			
		||||
                            ,(format ", ~a words, ~a kb"
 | 
			
		||||
                                     wordcount
 | 
			
		||||
                                     (exact->inexact (/ (round (/ size 100)) 10))))))))))
 | 
			
		||||
 | 
			
		||||
(module+ test
 | 
			
		||||
  (parameterize ([(config-parameter 'feature_offline::only) "false"])
 | 
			
		||||
    (check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
 | 
			
		||||
                                      (generate-results-content-fandom test-req "" "test" "Gacha" search-json-data))))))
 | 
			
		||||
							
								
								
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								src/search-provider-solr.rkt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,90 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
(require racket/dict
 | 
			
		||||
         racket/string
 | 
			
		||||
         (prefix-in easy: net/http-easy)
 | 
			
		||||
         "application-globals.rkt"
 | 
			
		||||
         "../lib/html-parsing/main.rkt"
 | 
			
		||||
         "../lib/url-utils.rkt"
 | 
			
		||||
         "whole-utils.rkt"
 | 
			
		||||
         "../lib/xexpr-utils.rkt")
 | 
			
		||||
 | 
			
		||||
(provide
 | 
			
		||||
 generate-results-content-solr)
 | 
			
		||||
 | 
			
		||||
(struct result^ (hl-title hl-body kb words page-path) #:transparent)
 | 
			
		||||
 | 
			
		||||
(define (generate-results-content-solr wikiname query params)
 | 
			
		||||
  ;; grab things from params that would modify the search
 | 
			
		||||
  (define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND")))
 | 
			
		||||
  (define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc")))
 | 
			
		||||
 | 
			
		||||
  ;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams=
 | 
			
		||||
  (define dest-url
 | 
			
		||||
    (format "http://localhost:8983/solr/~a/select?~a"
 | 
			
		||||
            wikiname
 | 
			
		||||
            (params->query `(("defType" . "edismax")
 | 
			
		||||
                             ("q" . ,query)
 | 
			
		||||
                             ("q.op" . ,(cdr op))
 | 
			
		||||
                             ("qf" . "title_prefix title^2.0 body table^0.3")
 | 
			
		||||
                             ("hl" . "true")
 | 
			
		||||
                             ("hl.method" . "unified")
 | 
			
		||||
                             ("hl.defaultSummary" . "true")
 | 
			
		||||
                             ("hl.fl" . "title,body")
 | 
			
		||||
                             ("fl" . "id,len,title")
 | 
			
		||||
                             ("hl.encoder" . "html")
 | 
			
		||||
                             ("hl.tag.pre" . "<mark>")
 | 
			
		||||
                             ("hl.tag.post" . "</mark>")
 | 
			
		||||
                             ("sort" . ,(cdr sort))))))
 | 
			
		||||
  ;; HTTP request to dest-url for search results
 | 
			
		||||
  (log-outgoing dest-url)
 | 
			
		||||
  (define res (easy:get dest-url #:timeouts timeouts))
 | 
			
		||||
  (define json (easy:response-json res))
 | 
			
		||||
 | 
			
		||||
  ;; build result objects
 | 
			
		||||
  (define highlighting (jp "/highlighting" json))
 | 
			
		||||
  (define results
 | 
			
		||||
    (for/list ([doc (jp "/response/docs" json)])
 | 
			
		||||
      (define id (jp "/id" doc))
 | 
			
		||||
      (define len (jp "/len" doc))
 | 
			
		||||
      (define title (jp "/title" doc))
 | 
			
		||||
      (define page-path (page-title->path title))
 | 
			
		||||
      (define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1
 | 
			
		||||
      (define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10
 | 
			
		||||
      (define hl (hash-ref highlighting (string->symbol id)))
 | 
			
		||||
      (define hl-title (cdr (html->xexp (jp "/title/0" hl))))
 | 
			
		||||
      (define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl)))))
 | 
			
		||||
      (result^ hl-title hl-body kb words page-path)))
 | 
			
		||||
 | 
			
		||||
  (define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100)))
 | 
			
		||||
 | 
			
		||||
  (define (value-selected? value current-value)
 | 
			
		||||
    (append
 | 
			
		||||
     `((value ,value))
 | 
			
		||||
     (if (equal? value current-value)
 | 
			
		||||
         `((selected))
 | 
			
		||||
         `())))
 | 
			
		||||
 | 
			
		||||
  ;; generate content for display in the wiki page layout
 | 
			
		||||
  `(div (@ (class "mw-parser-output"))
 | 
			
		||||
        (form (@ (class "my-result__filter"))
 | 
			
		||||
              (input (@ (type "hidden") (name "q") (value ,query)))
 | 
			
		||||
              (select (@ (name "op"))
 | 
			
		||||
                      (option (@ ,@(value-selected? "and" (car op))) "All words must match")
 | 
			
		||||
                      (option (@ ,@(value-selected? "or" (car op))) "Some words must match"))
 | 
			
		||||
              (select (@ (name "sort"))
 | 
			
		||||
                      (option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles")
 | 
			
		||||
                      (option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles"))
 | 
			
		||||
              (button "Filter results"))
 | 
			
		||||
        ;; header before the search results showing how many we found
 | 
			
		||||
        (p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime)
 | 
			
		||||
           (strong ,query))
 | 
			
		||||
        ;; *u*nordered *l*ist of matching search results
 | 
			
		||||
        (ul ,@(for/list ([result results])
 | 
			
		||||
                `(li (@ (class "my-result"))
 | 
			
		||||
                     (a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url
 | 
			
		||||
                        ,@(result^-hl-title result)) ; title
 | 
			
		||||
                     (p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview
 | 
			
		||||
                     (div (@ (class "my-result__info")) ; line under the search result
 | 
			
		||||
                          ,(format "~a words, ~a kb of readable stuff"
 | 
			
		||||
                                   (result^-words result)
 | 
			
		||||
                                   (result^-kb result))))))))
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue