Split massive uploads in Solr indexer

This commit is contained in:
Cadence Ember 2024-01-11 22:33:59 +13:00
parent b02e2a4053
commit a52d131b93

View file

@ -8,6 +8,7 @@
racket/promise racket/promise
racket/port racket/port
racket/runtime-path racket/runtime-path
racket/sequence
racket/string racket/string
file/gunzip file/gunzip
db db
@ -156,8 +157,16 @@
(define data (define data
(cond (cond
[(and (read-from-cache?) (file-exists? "cache.rkt")) [(and (read-from-cache?) (file-exists? "cache.rkt"))
(displayln "Reading in...") (define size (file-size "cache.rkt"))
(with-input-from-file "cache.rkt" (λ () (read)))] (call-with-input-file "cache.rkt"
(λ (in)
(define quit (make-progress (λ () (progress^ (ceiling (/ (file-position in) 64 1024))
(ceiling (/ size 64 1024))
"Reading in..."))
2))
(begin0
(read in)
(quit))))]
[else [else
(define x (box (progress^ 0 1 "..."))) (define x (box (progress^ 0 1 "...")))
(define quit (make-progress (λ () (unbox x)))) (define quit (make-progress (λ () (unbox x))))
@ -183,18 +192,22 @@
(display "Converting... ") (display "Converting... ")
(flush-output) (flush-output)
(define ser (jsexpr->bytes data)) (define slice-size 30000)
(define ser-port (open-input-bytes ser)) (define slices (ceiling (/ (length data) slice-size)))
(define quit (make-progress (λ () (progress^ (ceiling (/ (file-position ser-port) 64 1024)) (for ([slice (in-slice slice-size data)]
(ceiling (/ (bytes-length ser) 64 1024)) [i (in-naturals 1)])
"Posting...")) (define ser (jsexpr->bytes slice))
2)) (define ser-port (open-input-bytes ser))
(define res (define quit (make-progress (λ () (progress^ (ceiling (/ (file-position ser-port) 64 1024))
(post (format "http://localhost:8983/solr/~a/update?commit=true" wikiname) (ceiling (/ (bytes-length ser) 64 1024))
#:data ser-port (format "Posting... (~a/~a)" i slices)))
#:headers '#hasheq((Content-Type . "application/json")) 2))
#:timeouts (make-timeout-config #:lease 5 #:connect 5 #:request 300))) (define res
(quit) (post (format "http://localhost:8983/solr/~a/update?commit=true" wikiname)
(displayln (response-status-line res))) #:data ser-port
#:headers '#hasheq((Content-Type . "application/json"))
#:timeouts (make-timeout-config #:lease 5 #:connect 5 #:request 300)))
(quit)
(displayln (response-status-line res))))
(run start) (run start)