Fix some special characters in titles like ? and ;

This commit is contained in:
Cadence Ember 2022-11-17 23:25:06 +13:00
parent 1c83c0b4d3
commit 92591a5eab
Signed by: cadence
GPG key ID: BC1C2C61CF521B17
4 changed files with 25 additions and 15 deletions

View file

@ -52,7 +52,7 @@
,@(map ,@(map
(λ (result) (λ (result)
(define title (jp "/title" result)) (define title (jp "/title" result))
(define page-path (regexp-replace* #rx" " title "_")) (define page-path (page-title->path title))
`(li `(li
(a (@ (href ,(format "/~a/wiki/~a" wikiname page-path))) (a (@ (href ,(format "/~a/wiki/~a" wikiname page-path)))
,title))) ,title)))

View file

@ -18,9 +18,9 @@
(define examples (define examples
'(("minecraft" "Bricks") '(("minecraft" "Bricks")
("crosscode" "CrossCode_Wiki") ("crosscode" "CrossCode Wiki")
("undertale" "Hot_Dog...%3F") ("undertale" "Hot Dog...?")
("tardis" "Eleanor_Blake") ("tardis" "Eleanor Blake")
("zelda" "Boomerang"))) ("zelda" "Boomerang")))
(define content (define content
@ -45,7 +45,7 @@
(h2 "Example pages") (h2 "Example pages")
(ul (ul
,@(map (λ (x) ,@(map (λ (x)
`(li (a (@ (href ,(apply format "/~a/wiki/~a" x))) `(li (a (@ (href ,(format "/~a/wiki/~a" (car x) (page-title->path (cadr x)))))
,(apply format "~a: ~a" x)))) ,(apply format "~a: ~a" x))))
examples)) examples))
(h2 "Testimonials") (h2 "Testimonials")

View file

@ -38,7 +38,7 @@
(ul ,@(map (ul ,@(map
(λ (result) (λ (result)
(let* ([title (jp "/title" result)] (let* ([title (jp "/title" result)]
[page-path (regexp-replace* #rx" " title "_")] [page-path (page-title->path title)]
[timestamp (jp "/timestamp" result)] [timestamp (jp "/timestamp" result)]
[wordcount (jp "/wordcount" result)] [wordcount (jp "/wordcount" result)]
[size (jp "/size" result)]) [size (jp "/size" result)])

View file

@ -17,7 +17,9 @@
; prints "out: <url>" ; prints "out: <url>"
log-outgoing log-outgoing
; pass in a header, headers, or something useless. they'll all combine into a list ; pass in a header, headers, or something useless. they'll all combine into a list
build-headers) build-headers
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
page-title->path)
(module+ test (module+ test
(require "typed-rackunit.rkt")) (require "typed-rackunit.rkt"))
@ -26,14 +28,18 @@
;; https://url.spec.whatwg.org/#urlencoded-serializing ;; https://url.spec.whatwg.org/#urlencoded-serializing
(define urlencoded-set '(#\! #\' #\( #\) #\~ ; urlencoded set (define path-set '(#\; ; semicolon is part of the userinfo set in the URL standard, but I'm putting it here
#\$ #\% #\& #\+ #\, ; component set #\? #\` #\{ #\} ; path set
#\/ #\: #\; #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set #\ #\" #\# #\< #\> ; query set
#\? #\` #\{ #\} ; path set ; c0 controls included elsewhere
#\ #\" #\# #\< #\> ; query set ; higher ranges included elsewhere
; c0 controls included elsewhere ))
; higher ranges included elsewhere (define urlencoded-set (append
)) '(#\! #\' #\( #\) #\~ ; urlencoded set
#\$ #\% #\& #\+ #\, ; component set
#\/ #\: #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
)
path-set))
(: percent-encode (String (Listof Char) Boolean -> Bytes)) (: percent-encode (String (Listof Char) Boolean -> Bytes))
(define (percent-encode value set space-as-plus) (define (percent-encode value set space-as-plus)
@ -98,3 +104,7 @@
[(header? f) (list f)] [(header? f) (list f)]
[(pair? f) f])) [(pair? f) f]))
fs))) fs)))
(: page-title->path (String -> Bytes))
(define (page-title->path title)
(percent-encode (regexp-replace* " " title "_") path-set #f))