2022-08-23 09:57:42 +00:00
|
|
|
#lang typed/racket/base
|
|
|
|
(require racket/string
|
2023-05-27 11:37:45 +00:00
|
|
|
typed/net/url-structs
|
2022-08-23 09:57:42 +00:00
|
|
|
"pure-utils.rkt")
|
2022-10-09 10:43:21 +00:00
|
|
|
(require/typed web-server/http/request-structs
|
|
|
|
[#:opaque Header header?])
|
2022-08-23 09:57:42 +00:00
|
|
|
|
|
|
|
(provide
|
2022-10-02 09:44:44 +00:00
|
|
|
; regex to match wiki names
|
2022-10-04 08:18:30 +00:00
|
|
|
px-wikiname
|
2022-08-23 09:57:42 +00:00
|
|
|
; make a query string from an association list of strings
|
|
|
|
params->query
|
2023-02-05 04:56:15 +00:00
|
|
|
; custom percent encoding (you probably want params->query instead)
|
|
|
|
percent-encode
|
|
|
|
; sets for custom percent encoding
|
|
|
|
path-set urlencoded-set filename-set
|
2022-08-23 09:57:42 +00:00
|
|
|
; make a proxied version of a fandom url
|
|
|
|
u-proxy-url
|
|
|
|
; check whether a url is on a domain controlled by fandom
|
2022-10-04 08:13:07 +00:00
|
|
|
is-fandom-url?
|
2022-10-09 10:43:21 +00:00
|
|
|
; pass in a header, headers, or something useless. they'll all combine into a list
|
2022-11-17 10:25:06 +00:00
|
|
|
build-headers
|
|
|
|
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
|
2023-05-27 11:37:45 +00:00
|
|
|
page-title->path
|
|
|
|
; path/param eats semicolons into params, which need to be fixed back into semicolons
|
|
|
|
fix-semicolons-url-path
|
|
|
|
fix-semicolons-url)
|
2022-08-23 09:57:42 +00:00
|
|
|
|
|
|
|
(module+ test
|
|
|
|
(require "typed-rackunit.rkt"))
|
|
|
|
|
2022-10-13 09:54:46 +00:00
|
|
|
(define px-wikiname "[a-zA-Z0-9-]{1,50}")
|
2022-10-02 09:44:44 +00:00
|
|
|
|
2022-08-23 09:57:42 +00:00
|
|
|
;; https://url.spec.whatwg.org/#urlencoded-serializing
|
|
|
|
|
2022-11-17 10:25:06 +00:00
|
|
|
(define path-set '(#\; ; semicolon is part of the userinfo set in the URL standard, but I'm putting it here
|
|
|
|
#\? #\` #\{ #\} ; path set
|
|
|
|
#\ #\" #\# #\< #\> ; query set
|
|
|
|
; c0 controls included elsewhere
|
|
|
|
; higher ranges included elsewhere
|
|
|
|
))
|
|
|
|
(define urlencoded-set (append
|
|
|
|
'(#\! #\' #\( #\) #\~ ; urlencoded set
|
|
|
|
#\$ #\% #\& #\+ #\, ; component set
|
|
|
|
#\/ #\: #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
|
|
|
|
)
|
|
|
|
path-set))
|
2022-08-23 09:57:42 +00:00
|
|
|
|
2023-02-05 04:56:15 +00:00
|
|
|
(define filename-set '(#\< #\> #\: #\" #\/ #\\ #\| #\? #\* #\# #\~ #\&))
|
|
|
|
|
2022-08-23 09:57:42 +00:00
|
|
|
(: percent-encode (String (Listof Char) Boolean -> Bytes))
|
|
|
|
(define (percent-encode value set space-as-plus)
|
|
|
|
(define b (string->bytes/utf-8 value))
|
|
|
|
(apply bytes-append
|
|
|
|
(for/list ([char b]) : (Listof Bytes)
|
|
|
|
(cond
|
|
|
|
[(and space-as-plus (eq? char 32))
|
|
|
|
#"+"]
|
|
|
|
[(or (member (integer->char char) set)
|
|
|
|
(char . > . #x7E)
|
|
|
|
(char . <= . #x1F))
|
|
|
|
(bytes-append #"%" (string->bytes/latin-1
|
|
|
|
(string-upcase (number->string char 16))))]
|
|
|
|
[#t
|
|
|
|
(bytes char)]))))
|
|
|
|
|
|
|
|
(: params->query ((Listof (Pair String String)) -> String))
|
|
|
|
(define (params->query params)
|
|
|
|
(string-join
|
|
|
|
(map (λ ([p : (Pair String String)])
|
|
|
|
(format "~a=~a"
|
|
|
|
(percent-encode (car p) urlencoded-set #t)
|
|
|
|
(percent-encode (cdr p) urlencoded-set #t)))
|
|
|
|
params)
|
|
|
|
"&"))
|
|
|
|
(module+ test
|
|
|
|
(check-equal? (params->query '(("hello" . "world")))
|
|
|
|
"hello=world")
|
|
|
|
(check-equal? (params->query '(("a" . "hello world''") ("utf8" . "✓")))
|
|
|
|
"a=hello+world%27%27&utf8=%E2%9C%93"))
|
|
|
|
|
|
|
|
(: is-fandom-url? (String -> Boolean))
|
|
|
|
(define (is-fandom-url? url)
|
2022-10-04 08:18:30 +00:00
|
|
|
(regexp-match? (pregexp (format "^https://static\\.wikia\\.nocookie\\.net/|^https://~a\\.fandom\\.com/" px-wikiname)) url))
|
2022-08-23 09:57:42 +00:00
|
|
|
(module+ test
|
|
|
|
(check-true (is-fandom-url? "https://static.wikia.nocookie.net/wikiname/images/2/2f/SomeImage.jpg/revision/latest?cb=20110210094136"))
|
|
|
|
(check-true (is-fandom-url? "https://test.fandom.com/wiki/Some_Page"))
|
|
|
|
(check-false (is-fandom-url? "https://cadence.moe")))
|
|
|
|
|
|
|
|
(: u-proxy-url (String -> String))
|
|
|
|
(define (u-proxy-url url)
|
|
|
|
(u
|
|
|
|
is-fandom-url?
|
|
|
|
(λ ([v : String]) (string-append "/proxy?" (params->query `(("dest" . ,url)))))
|
|
|
|
url))
|
2022-10-04 08:13:07 +00:00
|
|
|
|
2022-10-09 10:43:21 +00:00
|
|
|
(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header)))
|
|
|
|
(define (build-headers . fs)
|
|
|
|
(apply
|
|
|
|
append
|
|
|
|
(map (λ ([f : (U Header (Listof Header) False Void)])
|
|
|
|
(cond
|
|
|
|
[(not f) null]
|
|
|
|
[(void? f) null]
|
|
|
|
[(null? f) null]
|
|
|
|
[(header? f) (list f)]
|
|
|
|
[(pair? f) f]))
|
|
|
|
fs)))
|
2022-11-17 10:25:06 +00:00
|
|
|
|
|
|
|
(: page-title->path (String -> Bytes))
|
|
|
|
(define (page-title->path title)
|
|
|
|
(percent-encode (regexp-replace* " " title "_") path-set #f))
|
2023-05-27 11:37:45 +00:00
|
|
|
|
|
|
|
(: fix-semicolons-url-path ((Listof Path/Param) -> (Listof Path/Param)))
|
|
|
|
(define (fix-semicolons-url-path pps)
|
|
|
|
(for/list ([pp pps])
|
|
|
|
(define path (path/param-path pp))
|
|
|
|
(if (or (null? (path/param-param pp))
|
|
|
|
(symbol? path))
|
|
|
|
pp
|
|
|
|
;; path/param does have params, which need to be fixed into a semicolon.
|
|
|
|
(path/param
|
|
|
|
(string-append path ";" (string-join (path/param-param pp) ";"))
|
|
|
|
null))))
|
|
|
|
|
|
|
|
(: fix-semicolons-url (URL -> URL))
|
|
|
|
(define (fix-semicolons-url orig-url)
|
|
|
|
(struct-copy url orig-url [path (fix-semicolons-url-path (url-path orig-url))]))
|
|
|
|
|