Fix some special characters in titles like ? and ;

This commit is contained in:
Cadence Ember 2022-11-17 23:25:06 +13:00
parent 1c83c0b4d3
commit 92591a5eab
Signed by: cadence
GPG key ID: BC1C2C61CF521B17
4 changed files with 25 additions and 15 deletions

View file

@ -52,7 +52,7 @@
,@(map
(λ (result)
(define title (jp "/title" result))
(define page-path (regexp-replace* #rx" " title "_"))
(define page-path (page-title->path title))
`(li
(a (@ (href ,(format "/~a/wiki/~a" wikiname page-path)))
,title)))

View file

@ -18,9 +18,9 @@
(define examples
'(("minecraft" "Bricks")
("crosscode" "CrossCode_Wiki")
("undertale" "Hot_Dog...%3F")
("tardis" "Eleanor_Blake")
("crosscode" "CrossCode Wiki")
("undertale" "Hot Dog...?")
("tardis" "Eleanor Blake")
("zelda" "Boomerang")))
(define content
@ -45,7 +45,7 @@
(h2 "Example pages")
(ul
,@(map (λ (x)
`(li (a (@ (href ,(apply format "/~a/wiki/~a" x)))
`(li (a (@ (href ,(format "/~a/wiki/~a" (car x) (page-title->path (cadr x)))))
,(apply format "~a: ~a" x))))
examples))
(h2 "Testimonials")

View file

@ -38,7 +38,7 @@
(ul ,@(map
(λ (result)
(let* ([title (jp "/title" result)]
[page-path (regexp-replace* #rx" " title "_")]
[page-path (page-title->path title)]
[timestamp (jp "/timestamp" result)]
[wordcount (jp "/wordcount" result)]
[size (jp "/size" result)])

View file

@ -17,7 +17,9 @@
; prints "out: <url>"
log-outgoing
; pass in a header, headers, or something useless. they'll all combine into a list
build-headers)
build-headers
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
page-title->path)
(module+ test
(require "typed-rackunit.rkt"))
@ -26,14 +28,18 @@
;; https://url.spec.whatwg.org/#urlencoded-serializing
(define urlencoded-set '(#\! #\' #\( #\) #\~ ; urlencoded set
#\$ #\% #\& #\+ #\, ; component set
#\/ #\: #\; #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
#\? #\` #\{ #\} ; path set
#\ #\" #\# #\< #\> ; query set
; c0 controls included elsewhere
; higher ranges included elsewhere
))
(define path-set '(#\; ; semicolon is part of the userinfo set in the URL standard, but I'm putting it here
#\? #\` #\{ #\} ; path set
#\ #\" #\# #\< #\> ; query set
; c0 controls included elsewhere
; higher ranges included elsewhere
))
(define urlencoded-set (append
'(#\! #\' #\( #\) #\~ ; urlencoded set
#\$ #\% #\& #\+ #\, ; component set
#\/ #\: #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
)
path-set))
(: percent-encode (String (Listof Char) Boolean -> Bytes))
(define (percent-encode value set space-as-plus)
@ -98,3 +104,7 @@
[(header? f) (list f)]
[(pair? f) f]))
fs)))
(: page-title->path (String -> Bytes))
(define (page-title->path title)
(percent-encode (regexp-replace* " " title "_") path-set #f))