Compare commits
1 commit
main
...
file-names
Author | SHA1 | Date | |
---|---|---|---|
e9748d774b |
112 changed files with 817 additions and 15878 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -16,5 +16,3 @@ compiled
|
|||
|
||||
# Personal
|
||||
/config.ini
|
||||
misc
|
||||
storage
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
#lang cli
|
||||
(require charterm
|
||||
"archiver.rkt")
|
||||
|
||||
(help (usage "Downloads a single Fandom wiki in BreezeWiki offline format."
|
||||
""
|
||||
"Downloaded pages go into `archive/` next to the executable."
|
||||
"Database goes into `archiver.db*` next to the executable."
|
||||
"The database is necessary to store your download progress and resume where you left off if the process is interrupted."))
|
||||
|
||||
(flag (output-quiet?)
|
||||
("-q" "--output-quiet" "disable progress output")
|
||||
(output-quiet? #t))
|
||||
|
||||
(flag (output-progress?)
|
||||
("-p" "--output-progress" "progress output for terminals (default in a tty)")
|
||||
(output-progress? #t))
|
||||
|
||||
(flag (output-lines?)
|
||||
("-l" "--output-lines" "output the name of each file downloaded (default outside of a tty)")
|
||||
(output-lines? #t))
|
||||
|
||||
(constraint (one-of output-quiet? output-lines? output-progress?))
|
||||
|
||||
|
||||
|
||||
(program
|
||||
(start [wikiname "wikiname to download"])
|
||||
;; set up arguments
|
||||
(define width 80)
|
||||
(when (not (or (output-quiet?) (output-lines?) (output-progress?)))
|
||||
(cond [(terminal-port? current-input-port)
|
||||
(output-progress? #t)]
|
||||
[else
|
||||
(output-lines? #t)]))
|
||||
(define (update-width)
|
||||
(when (output-progress?)
|
||||
(case (system-type 'os)
|
||||
[(linux)
|
||||
(with-charterm
|
||||
(call-with-values (λ () (charterm-screen-size))
|
||||
(λ (cols rows) (set! width cols))))]
|
||||
[else 100])))
|
||||
(update-width)
|
||||
;; check
|
||||
(when (or (not wikiname) (equal? wikiname ""))
|
||||
(raise-user-error "Please specify the wikiname to download on the command line."))
|
||||
;; progress reporting based on selected mode
|
||||
(define (report-progress a b c)
|
||||
(define basename (basename->name-for-query c))
|
||||
(cond
|
||||
[(output-lines?)
|
||||
(displayln basename)]
|
||||
[(output-progress?)
|
||||
(when (eq? (modulo a 20) 0)
|
||||
(thread (λ () (update-width))))
|
||||
(define prefix (format "[~a] [~a/~a] " wikiname a b))
|
||||
(define rest (- width (string-length prefix)))
|
||||
(define real-width (min (string-length basename) rest))
|
||||
(define spare-width (- rest real-width))
|
||||
(define name-display (substring basename 0 real-width))
|
||||
(printf "\e[2K\r~a~a" prefix name-display)
|
||||
(flush-output)]))
|
||||
;; download all stages
|
||||
(for ([stage all-stages]
|
||||
[i (in-naturals 1)])
|
||||
(printf "> Stage ~a/~a~n" i (length all-stages))
|
||||
(stage wikiname report-progress)
|
||||
(displayln "")))
|
||||
|
||||
(run start)
|
|
@ -1,82 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
racket/list
|
||||
racket/path
|
||||
racket/string
|
||||
json
|
||||
json-pointer
|
||||
db
|
||||
"../lib/syntax.rkt")
|
||||
|
||||
(provide
|
||||
get-slc
|
||||
query-exec*
|
||||
query-rows*
|
||||
query-list*
|
||||
query-value*
|
||||
query-maybe-value*
|
||||
query-maybe-row*)
|
||||
|
||||
(define storage-path (anytime-path ".." "storage"))
|
||||
(define database-file (build-path storage-path "archiver.db"))
|
||||
|
||||
(define slc (box #f))
|
||||
(define (get-slc)
|
||||
(define slc* (unbox slc))
|
||||
(cond
|
||||
[slc* slc*]
|
||||
[else
|
||||
(make-directory* storage-path)
|
||||
(define slc* (sqlite3-connect #:database database-file #:mode 'create))
|
||||
(query-exec slc* "PRAGMA journal_mode=WAL")
|
||||
(define database-version
|
||||
(with-handlers ([exn:fail:sql?
|
||||
(λ (exn)
|
||||
; need to set up the database
|
||||
(query-exec slc* "create table database_version (version integer, primary key (version))")
|
||||
(query-exec slc* "insert into database_version values (0)")
|
||||
0)])
|
||||
(query-value slc* "select version from database_version")))
|
||||
|
||||
(define migrations
|
||||
(wrap-sql
|
||||
((query-exec slc* "create table page (wikiname TEXT NOT NULL, basename TEXT NOT NULL, progress INTEGER NOT NULL, PRIMARY KEY (wikiname, basename))")
|
||||
(query-exec slc* "create table wiki (wikiname TEXT NOT NULL, progress INTEGER, PRIMARY KEY (wikiname))"))
|
||||
((query-exec slc* "create table special_page (wikiname TEXT NOT NULL, key TEXT NOT NULL, basename TEXT NOT NULL, PRIMARY KEY (wikiname, key))"))
|
||||
((query-exec slc* "update wiki set progress = 2 where wikiname in (select wikiname from wiki inner join page using (wikiname) group by wikiname having min(page.progress) = 1)"))
|
||||
((query-exec slc* "create table image (wikiname TEXT NOT NULL, hash TEXT NTO NULL, url TEXT NOT NULL, ext TEXT, source INTEGER NOT NULL, progress INTEGER NOT NULL, PRIMARY KEY (wikiname, hash))"))
|
||||
((query-exec slc* "alter table wiki add column sitename TEXT")
|
||||
(query-exec slc* "alter table wiki add column basepage TEXT")
|
||||
(query-exec slc* "alter table wiki add column license_text TEXT")
|
||||
(query-exec slc* "alter table wiki add column license_url TEXT"))
|
||||
((query-exec slc* "alter table page add column redirect"))))
|
||||
|
||||
(let do-migrate-step ()
|
||||
(when (database-version . < . (length migrations))
|
||||
(call-with-transaction
|
||||
slc*
|
||||
(list-ref migrations database-version))
|
||||
(set! database-version (add1 database-version))
|
||||
(query-exec slc* "update database_version set version = $1" database-version)
|
||||
(do-migrate-step)))
|
||||
|
||||
(set-box! slc slc*)
|
||||
slc*]))
|
||||
|
||||
(define (query-exec* . args)
|
||||
(apply query-exec (get-slc) args))
|
||||
|
||||
(define (query-rows* . args)
|
||||
(apply query-rows (get-slc) args))
|
||||
|
||||
(define (query-list* . args)
|
||||
(apply query-list (get-slc) args))
|
||||
|
||||
(define (query-value* . args)
|
||||
(apply query-value (get-slc) args))
|
||||
|
||||
(define (query-maybe-value* . args)
|
||||
(apply query-maybe-value (get-slc) args))
|
||||
|
||||
(define (query-maybe-row* . args)
|
||||
(apply query-maybe-row (get-slc) args))
|
|
@ -1,390 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/class
|
||||
racket/draw
|
||||
racket/format
|
||||
racket/function
|
||||
racket/list
|
||||
racket/math
|
||||
racket/port
|
||||
racket/set
|
||||
racket/splicing
|
||||
racket/string
|
||||
(except-in pict text table)
|
||||
db
|
||||
net/http-easy
|
||||
memo
|
||||
(only-in racket/gui timer%)
|
||||
racket/gui/easy
|
||||
racket/gui/easy/operator
|
||||
(only-in pict bitmap)
|
||||
images/icons/arrow
|
||||
images/icons/control
|
||||
images/icons/stickman
|
||||
images/icons/style
|
||||
images/icons/symbol
|
||||
"archiver-database.rkt"
|
||||
"archiver.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
|
||||
(default-icon-material rubber-icon-material)
|
||||
|
||||
(require (for-syntax racket/base racket/match racket/set racket/string))
|
||||
|
||||
(define-syntax (@> stx)
|
||||
(define form (cdr (syntax->datum stx)))
|
||||
(match form
|
||||
[(list form) ; (@> (fn @obs))
|
||||
;; identify the observables and replace with non-@ symbols
|
||||
(define collection (mutable-set))
|
||||
(define updated
|
||||
(let loop ([sexp form])
|
||||
(cond [(symbol? sexp)
|
||||
(let ([as-s (symbol->string sexp)])
|
||||
(if (string-prefix? as-s "@")
|
||||
(let ([without-@ (string->symbol (substring as-s 1))])
|
||||
(set-add! collection (cons sexp without-@))
|
||||
without-@)
|
||||
sexp))]
|
||||
[(pair? sexp) (cons (loop (car sexp)) (loop (cdr sexp)))]
|
||||
[#t sexp])))
|
||||
(define collection-l (set->list collection))
|
||||
;; return obs-combine -> updated-form
|
||||
(datum->syntax stx `(obs-combine (λ (,@(map cdr collection-l)) ,updated) ,@(map car collection-l)))]
|
||||
[(list (? string? str) args ...) ; (@> "Blah: ~a/~a" @arg1 arg2)
|
||||
;; identify the observables and replace with non-@ symbols
|
||||
(define collection-l
|
||||
(for/list ([arg args])
|
||||
(if (symbol? arg)
|
||||
(let ([as-s (symbol->string arg)])
|
||||
(if (string-prefix? as-s "@")
|
||||
(let ([without-@ (string->symbol (substring as-s 1))])
|
||||
(cons arg without-@))
|
||||
(cons #f arg)))
|
||||
(cons #f arg))))
|
||||
(define collection-lo (filter car collection-l))
|
||||
;; return obs-combine -> format
|
||||
(datum->syntax stx `(obs-combine (λ (,@(map cdr collection-lo)) (format ,str ,@(map cdr collection-l))) ,@(map car collection-lo)))]))
|
||||
|
||||
(define/obs @auto-retry #f)
|
||||
|
||||
(define-struct qi^ (wikiname st stage progress max-progress ticks eta th) #:transparent) ;; queue item
|
||||
|
||||
(define rows (query-rows* "select wikiname, progress from wiki where progress < 4"))
|
||||
(define/obs @queue null)
|
||||
(define (add-wikiname-to-queue wikiname st stage)
|
||||
(@queue . <~ . (λ (queue)
|
||||
(define already-exists? (findf (λ (qi) (equal? (qi^-wikiname qi) wikiname)) queue))
|
||||
(if already-exists?
|
||||
queue
|
||||
(append queue (list (qi^ wikiname st stage 0 1 0 "..." #f)))))))
|
||||
(for ([row rows])
|
||||
(add-wikiname-to-queue (vector-ref row 0)
|
||||
(if (= (vector-ref row 1) 4)
|
||||
'complete
|
||||
'queued)
|
||||
(vector-ref row 1)))
|
||||
|
||||
(define status-icon-size 32)
|
||||
(define status-icon-min-width 36)
|
||||
(define button-icon-size 12)
|
||||
|
||||
(define color-green (make-color 90 212 68))
|
||||
|
||||
(define (resize coords fraction)
|
||||
(for/list ([coord (in-list coords)])
|
||||
(cons (* fraction (car coord))
|
||||
(* fraction (cdr coord)))))
|
||||
|
||||
(define (flat-right-arrow #:height [height 32] #:color [color #f])
|
||||
((if color
|
||||
(curryr colorize color)
|
||||
values)
|
||||
(dc (λ (dc dx dy)
|
||||
(send dc draw-polygon (resize
|
||||
(list '(0 . 9) '(15 . 9) '(14 . 0)
|
||||
'(31 . 15.5)
|
||||
'(14 . 31) '(15 . 22) '(0 . 22))
|
||||
(/ height 32))))
|
||||
height height)))
|
||||
|
||||
(define (double-left-arrow-icon #:height [height 32])
|
||||
(define shift (/ height 48))
|
||||
(pict->bitmap
|
||||
(scale-to-fit
|
||||
(panorama
|
||||
(pin-under
|
||||
(bitmap
|
||||
(left-over-arrow-icon #:color halt-icon-color #:height height
|
||||
#:material rubber-icon-material))
|
||||
(- (* -20 shift) 2) (+ (* 6 shift) 1)
|
||||
(bitmap
|
||||
(bitmap-render-icon
|
||||
(pict->bitmap
|
||||
(rotate
|
||||
(flat-right-arrow #:color (make-object color% 255 64 64) #:height (/ height 1.26))
|
||||
(* pi 1.23)))))
|
||||
#;(rotate
|
||||
(flat-right-arrow #:color (make-object color% 255 64 64) #:height (/ height 1.26))
|
||||
(* pi 1.23))))
|
||||
height height #:mode 'preserve/max)))
|
||||
|
||||
(splicing-let ([frame-count 20])
|
||||
(define stickman-frames
|
||||
(for/vector ([s (in-range 0 1 (/ 1 frame-count))])
|
||||
(running-stickman-icon
|
||||
s
|
||||
#:height status-icon-size
|
||||
#:material (default-icon-material)))))
|
||||
|
||||
(define (stick n)
|
||||
(vector-ref stickman-frames (modulo n (vector-length stickman-frames))))
|
||||
|
||||
(define status-icons
|
||||
(hasheq 'queued (stop-icon #:color syntax-icon-color #:height status-icon-size)
|
||||
'paused (continue-forward-icon #:color syntax-icon-color #:height status-icon-size)
|
||||
'running (stick 0)
|
||||
'error (x-icon #:height status-icon-size)
|
||||
'complete (check-icon #:color color-green #:height status-icon-size)))
|
||||
|
||||
(define action-icons
|
||||
(hasheq 'pause (pause-icon #:color syntax-icon-color #:height button-icon-size)
|
||||
'resume (play-icon #:color color-green #:height button-icon-size)
|
||||
'reset (left-over-arrow-icon #:color halt-icon-color #:height button-icon-size)
|
||||
'reseter (double-left-arrow-icon #:height button-icon-size)))
|
||||
|
||||
(define (bitmap-view @the-bitmap [min-width 1])
|
||||
(pict-canvas #:min-size (@> (list (max min-width (send @the-bitmap get-width)) (send @the-bitmap get-height))) #;(if min-size (list min-size min-size) #f)
|
||||
#:stretch '(#f #f)
|
||||
#:style '(transparent)
|
||||
@the-bitmap
|
||||
bitmap))
|
||||
|
||||
(define (exn->string e)
|
||||
(with-output-to-string
|
||||
(λ ()
|
||||
(displayln (exn-message e))
|
||||
(displayln "context:")
|
||||
(for ([item (continuation-mark-set->context (exn-continuation-marks e))])
|
||||
(printf " ~a" (srcloc->string (cdr item)))
|
||||
(when (car item)
|
||||
(printf ": ~a" (car item)))
|
||||
(displayln "")))))
|
||||
|
||||
(define ((handle-graphical-exn @qi) e)
|
||||
(displayln (exn->string e) (current-error-port))
|
||||
(cond
|
||||
[(obs-peek @auto-retry)
|
||||
(void) ;; TODO
|
||||
#;(do-retry-end wikiname)]
|
||||
[#t
|
||||
(update-qi @qi [st 'error])
|
||||
(do-try-unpause-next-entry)
|
||||
(thread
|
||||
(λ ()
|
||||
(define/obs @visible? #t)
|
||||
(render
|
||||
(dialog #:title "Download Error"
|
||||
#:style '(resize-border)
|
||||
#:mixin (λ (%) (class % (super-new)
|
||||
(obs-observe! @visible? (λ (visible?) (send this show visible?)))))
|
||||
(vpanel #:margin '(15 15)
|
||||
(text (format "Encountered this error while downloading ~a:" (qi^-wikiname (obs-peek @qi))))
|
||||
(input #:style '(multiple hscroll)
|
||||
#:min-size '(#f 200)
|
||||
(exn->string e))
|
||||
;; TODO
|
||||
#;(button "Retry Now" (λ () (:= @visible? #f) (do-retry-now wikiname)))
|
||||
#;(button "Retry Round-Robin" (λ () (:= @visible? #f) (do-retry-end wikiname)))
|
||||
#;(button "Skip Wiki" (λ () (:= @visible? #f) (do-continue)))
|
||||
#;(button "Use Auto-Retry" (λ ()
|
||||
(:= @auto-retry #t)
|
||||
(:= @visible? #f)
|
||||
(do-retry-end wikiname)))
|
||||
#;(text "Be careful not to auto-retry an infinite loop!")))
|
||||
main-window)))
|
||||
(sleep)
|
||||
; make sure the broken thread is gone
|
||||
(define th (qi^-th (obs-peek @qi)))
|
||||
(when th (kill-thread th))]))
|
||||
|
||||
(define segments
|
||||
(list
|
||||
(list 5/100 (make-color 0 223 217))
|
||||
(list 88/100 color-green)
|
||||
(list 2/100 (make-color 0 223 217))
|
||||
(list 5/100 color-green)))
|
||||
(define segment-spacing 2)
|
||||
(unless (= (apply + (map car segments)) 1)
|
||||
(error 'segments "segments add up to ~a, not 1" (apply + (map car segments))))
|
||||
|
||||
;; return the new bitmap, which can be drawn on a dc<%>
|
||||
(define/memoize (ray-trace width height stage progress max-progress)
|
||||
;; (printf "rendering ~a ~a/~a at ~a~n" stage progress max-progress (current-inexact-milliseconds))
|
||||
(define bm (make-object bitmap% width height #f #t))
|
||||
(define dc (make-object bitmap-dc% bm))
|
||||
(define width-available (- width (* (length segments) segment-spacing)))
|
||||
(send dc set-smoothing 'unsmoothed)
|
||||
(send dc set-pen "black" 0 'transparent)
|
||||
(for/fold ([offset 0])
|
||||
([segment segments]
|
||||
[i (in-naturals 0)]) ;; zero indexed stages?
|
||||
;; calculate start and end locations of grey bar
|
||||
(define-values (segment-proportion segment-color) (apply values segment))
|
||||
(define segment-start (if (= offset 0) 0 (+ offset segment-spacing)))
|
||||
(define segment-width (* width-available segment-proportion))
|
||||
;; draw grey bar
|
||||
(send dc set-brush (make-color 180 180 180 0.4) 'solid)
|
||||
(send dc draw-rectangle segment-start 0 segment-width height)
|
||||
;; draw solid bar according to the current item's progress
|
||||
(define proportion
|
||||
(cond [(stage . < . i) 0]
|
||||
[(stage . > . i) 1]
|
||||
[(max-progress . <= . 0) 0]
|
||||
[(progress . < . 0) 0]
|
||||
[(progress . >= . max-progress) 1]
|
||||
[else (progress . / . max-progress)]))
|
||||
(send dc set-brush segment-color 'solid)
|
||||
(send dc draw-rectangle segment-start 0 (* proportion segment-width) height)
|
||||
(+ segment-start segment-width))
|
||||
(bitmap-render-icon bm 6/8))
|
||||
|
||||
;; get ray traced bitmap (possibly from cache) and draw on dc<%>
|
||||
(define (draw-bar orig-dc qi)
|
||||
;; (println ray-traced)
|
||||
(define-values (width height) (send orig-dc get-size))
|
||||
(send orig-dc draw-bitmap (ray-trace width height (qi^-stage qi) (qi^-progress qi) (qi^-max-progress qi)) 0 0))
|
||||
|
||||
(define ((make-progress-updater @qi) a b c)
|
||||
;; (printf "~a: ~a/~a ~a~n" (qi^-wikiname (obs-peek @qi)) a b c)
|
||||
(update-qi @qi [progress a] [max-progress b] [ticks (add1 (qi^-ticks (obs-peek @qi)))]))
|
||||
|
||||
(define/obs @input "")
|
||||
|
||||
(define (do-add-to-queue)
|
||||
(define wikiname (string-trim (obs-peek @input)))
|
||||
(when ((string-length wikiname) . > . 0)
|
||||
(add-wikiname-to-queue wikiname 'queued 0)) ;; TODO: automatically start?
|
||||
(:= @input ""))
|
||||
|
||||
(define-syntax-rule (update-qi @qi args ...)
|
||||
(let ([wikiname (qi^-wikiname (obs-peek @qi))])
|
||||
(@queue . <~ . (λ (queue)
|
||||
(for/list ([qi queue])
|
||||
(if (equal? (qi^-wikiname qi) wikiname)
|
||||
(struct-copy qi^ qi args ...)
|
||||
qi))))))
|
||||
|
||||
(define (do-start-qi @qi)
|
||||
(define th
|
||||
(thread (λ ()
|
||||
(with-handlers ([exn? (handle-graphical-exn @qi)])
|
||||
(define last-stage
|
||||
(for/last ([stage all-stages]
|
||||
[i (in-naturals)])
|
||||
(update-qi @qi [stage i])
|
||||
(stage (qi^-wikiname (obs-peek @qi)) (make-progress-updater @qi))
|
||||
i))
|
||||
(update-qi @qi [st 'complete] [stage (add1 last-stage)])
|
||||
(do-try-unpause-next-entry)))))
|
||||
(update-qi @qi [st 'running] [th th]))
|
||||
|
||||
(define (do-stop-qi @qi)
|
||||
(define th (qi^-th (obs-peek @qi)))
|
||||
(when th (kill-thread th))
|
||||
(update-qi @qi [th #f] [st 'paused]))
|
||||
|
||||
(define (do-reset-qi @qi)
|
||||
(define reset-progress-to 0)
|
||||
(define th (qi^-th (obs-peek @qi)))
|
||||
(when th (kill-thread th))
|
||||
(update-qi @qi [th #f] [st 'queued] [stage reset-progress-to] [progress 0] [max-progress 0])
|
||||
(query-exec* "update wiki set progress = ? where wikiname = ?" reset-progress-to (qi^-wikiname (obs-peek @qi))))
|
||||
|
||||
(define (do-reseter-qi @qi)
|
||||
(do-reset-qi @qi)
|
||||
(query-exec* "delete from page where wikiname = ?" (qi^-wikiname (obs-peek @qi))))
|
||||
|
||||
(define (do-try-unpause-next-entry)
|
||||
(define queue (obs-peek @queue))
|
||||
(define next-qi (for/last ([qi queue]
|
||||
#:when (memq (qi^-st qi) '(paused queued)))
|
||||
qi))
|
||||
(when next-qi
|
||||
(define @qi (@queue . ~> . (λ (queue) (findf (λ (qi) (equal? (qi^-wikiname qi) (qi^-wikiname next-qi))) queue))))
|
||||
(do-start-qi @qi)))
|
||||
|
||||
(define main-window
|
||||
(render
|
||||
(window
|
||||
#:title "Fandom Archiver"
|
||||
#:size '(400 300)
|
||||
#:mixin (λ (%) (class %
|
||||
(super-new)
|
||||
(define/augment (on-close)
|
||||
(for ([qi (obs-peek @queue)])
|
||||
(when (qi^-th qi)
|
||||
(kill-thread (qi^-th qi))))
|
||||
#;(disconnect*))))
|
||||
(vpanel
|
||||
#:spacing 10
|
||||
#:margin '(5 5)
|
||||
(hpanel
|
||||
#:stretch '(#t #f)
|
||||
#:spacing 10
|
||||
(hpanel
|
||||
(text "https://")
|
||||
(input @input
|
||||
(λ (event data) (cond
|
||||
[(eq? event 'input) (:= @input data)]
|
||||
[(eq? event 'return) (do-add-to-queue)])))
|
||||
(text ".fandom.com"))
|
||||
(button "Download Wiki" do-add-to-queue))
|
||||
(list-view
|
||||
#:style '(vertical)
|
||||
@queue
|
||||
#:key qi^-wikiname
|
||||
(λ (k @qi)
|
||||
(define @status-icons
|
||||
(@> (case (qi^-st @qi)
|
||||
[(running) (stick (qi^-ticks @qi))]
|
||||
[else (hash-ref status-icons (qi^-st @qi))])))
|
||||
(define @is-running?
|
||||
(@> (memq (qi^-st @qi) '(running))))
|
||||
(define @is-complete?
|
||||
(@> (eq? (qi^-st @qi) 'complete)))
|
||||
;; state icon at the left side
|
||||
(hpanel #:stretch '(#t #f)
|
||||
#:alignment '(left center)
|
||||
#:spacing 8
|
||||
(bitmap-view @status-icons status-icon-min-width)
|
||||
(vpanel
|
||||
;; name and buttons (top half)
|
||||
(hpanel #:alignment '(left bottom)
|
||||
(text (@> (qi^-wikiname @qi)))
|
||||
(spacer)
|
||||
(hpanel
|
||||
#:stretch '(#f #f)
|
||||
|
||||
(if-view @is-running?
|
||||
(button (hash-ref action-icons 'pause)
|
||||
(λ () (do-stop-qi @qi)))
|
||||
(hpanel
|
||||
#:stretch '(#f #f)
|
||||
(button (hash-ref action-icons 'reseter)
|
||||
(λ () (do-reseter-qi @qi)))
|
||||
(button (hash-ref action-icons 'reset)
|
||||
(λ () (do-reset-qi @qi)))
|
||||
(button (hash-ref action-icons 'resume)
|
||||
(λ () (do-start-qi @qi)))))))
|
||||
;; progress bar (bottom half)
|
||||
(hpanel
|
||||
(canvas
|
||||
@qi
|
||||
#:style '(transparent)
|
||||
#:margin '(3 3)
|
||||
draw-bar)
|
||||
(hpanel #:min-size '(68 #f)
|
||||
#:stretch '(#f #f)
|
||||
#:alignment '(right center)
|
||||
(text (@> (format "eta ~a" (qi^-eta @qi))))))))))))))
|
|
@ -1,388 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
racket/format
|
||||
racket/function
|
||||
racket/list
|
||||
racket/path
|
||||
racket/sequence
|
||||
racket/string
|
||||
net/url
|
||||
net/mime
|
||||
file/sha1
|
||||
net/http-easy
|
||||
db
|
||||
json
|
||||
"archiver-database.rkt"
|
||||
"../lib/html-parsing/main.rkt"
|
||||
"../lib/mime-types.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/tree-updater.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"../lib/archive-file-mappings.rkt")
|
||||
|
||||
(provide
|
||||
basename->name-for-query
|
||||
image-url->values
|
||||
hash->save-dir
|
||||
all-stages)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
(define archive-root (anytime-path ".." "storage/archive"))
|
||||
(make-directory* archive-root)
|
||||
|
||||
(define sources '#hasheq((style . 1) (page . 2)))
|
||||
|
||||
(define (get-origin wikiname)
|
||||
(format "https://~a.fandom.com" wikiname))
|
||||
|
||||
(define (insert-wiki-entry wikiname)
|
||||
(define dest-url
|
||||
(format "https://~a.fandom.com/api.php?~a"
|
||||
wikiname
|
||||
(params->query '(("action" . "query")
|
||||
("meta" . "siteinfo")
|
||||
("siprop" . "general|rightsinfo|statistics|namespaces")
|
||||
("format" . "json")
|
||||
("formatversion" . "2")))))
|
||||
(define data (response-json (get dest-url)))
|
||||
(define content-nss
|
||||
(sort
|
||||
(for/list ([(k v) (in-hash (jp "/query/namespaces" data))]
|
||||
#:do [(define id (hash-ref v 'id))]
|
||||
#:when (and (id . < . 2900) ; exclude maps namespace
|
||||
(hash-ref v 'content))) ; exclude non-content and talk namespaces
|
||||
id)
|
||||
<))
|
||||
(define exists? (query-maybe-value* "select progress from wiki where wikiname = ?" wikiname))
|
||||
(if (and exists? (not (sql-null? exists?)))
|
||||
(query-exec* "update wiki set sitename = ?, basepage = ?, license_text = ?, license_url = ? where wikiname = ?"
|
||||
(jp "/query/general/sitename" data)
|
||||
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
|
||||
(jp "/query/rightsinfo/text" data)
|
||||
(jp "/query/rightsinfo/url" data)
|
||||
wikiname)
|
||||
(query-exec* "insert into wiki (wikiname, progress, sitename, basepage, license_text, license_url) values (?, 0, ?, ?, ?, ?)"
|
||||
wikiname
|
||||
(jp "/query/general/sitename" data)
|
||||
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
|
||||
(jp "/query/rightsinfo/text" data)
|
||||
(jp "/query/rightsinfo/url" data)))
|
||||
(values (jp "/query/statistics/articles" data)
|
||||
content-nss))
|
||||
|
||||
|
||||
(define (check-style-for-images wikiname path)
|
||||
(define content (file->string path))
|
||||
(define urls (regexp-match* #rx"url\\(\"?'?([^)]*)'?\"?\\)" content #:match-select cadr))
|
||||
(for/list ([url urls]
|
||||
#:when (not (or (equal? url "")
|
||||
(equal? url "'")
|
||||
(string-suffix? url "\"")
|
||||
(string-contains? url "/resources-ucp/")
|
||||
(string-contains? url "/fonts/")
|
||||
(string-contains? url "/drm_fonts/")
|
||||
(string-contains? url "//db.onlinewebfonts.com/")
|
||||
(string-contains? url "//bits.wikimedia.org/")
|
||||
(string-contains? url "mygamercard.net/")
|
||||
(string-contains? url "dropbox")
|
||||
(string-contains? url "only=styles")
|
||||
(string-contains? url "https://https://")
|
||||
(regexp-match? #rx"^%20" url)
|
||||
(regexp-match? #rx"^data:" url)
|
||||
(regexp-match? #rx"^file:" url))))
|
||||
(cond
|
||||
[(string-prefix? url "https://") url]
|
||||
[(string-prefix? url "http://") (regexp-replace #rx"http:" url "https:")]
|
||||
[(string-prefix? url "httpshttps://") (regexp-replace #rx"httpshttps://" url "https://")]
|
||||
[(string-prefix? url "//") (string-append "https:" url)]
|
||||
[(string-prefix? url "/") (format "https://~a.fandom.com~a" wikiname url)]
|
||||
[else (raise-user-error "While calling check-style-for-images, this URL had an unknown format and couldn't be saved:" url path)])))
|
||||
|
||||
(define (download-styles-for-wiki wikiname callback)
|
||||
(define save-dir (build-path archive-root wikiname "styles"))
|
||||
(make-directory* save-dir)
|
||||
(define theme (λ (theme-name)
|
||||
(cons (format "https://~a.fandom.com/wikia.php?controller=ThemeApi&method=themeVariables&variant=~a" wikiname theme-name)
|
||||
(build-path save-dir (format "themeVariables-~a.css" theme-name)))))
|
||||
;; (Listof (Pair url save-path))
|
||||
(define styles
|
||||
(list
|
||||
(theme "default")
|
||||
(theme "light")
|
||||
(theme "dark")
|
||||
(cons (format "https://~a.fandom.com/load.php?lang=en&modules=site.styles%7Cskin.fandomdesktop.styles%7Cext.fandom.PortableInfoboxFandomDesktop.css%7Cext.fandom.GlobalComponents.CommunityHeaderBackground.css%7Cext.gadget.site-styles%2Csound-styles&only=styles&skin=fandomdesktop" wikiname)
|
||||
(build-path save-dir "site.css"))))
|
||||
(for ([style styles]
|
||||
[i (in-naturals)])
|
||||
(callback i (length styles) "styles...")
|
||||
(define r (get (car style)))
|
||||
(define body (response-body r))
|
||||
(display-to-file body (cdr style) #:exists 'replace)
|
||||
;; XXX: how the HELL do I deal with @import?? would need some kind of recursion here. how will the page server know where to look up the style file to be able to serve them again? do I add another link-stylesheet tag to the main page? what about the remaining stuck @import url?
|
||||
)
|
||||
(callback (length styles) (length styles) "styles...")
|
||||
styles)
|
||||
|
||||
(define (hash->save-dir wikiname hash)
|
||||
(build-path archive-root wikiname "images" (substring hash 0 1) (substring hash 0 2)))
|
||||
|
||||
(define (image-url->values i)
|
||||
;; TODO: handle case where there is multiple broken cb parameter on minecraft wiki
|
||||
;; TODO: ensure it still "works" with broken & on minecraft wiki
|
||||
(define no-cb (regexp-replace #rx"\\cb=[0-9]+&?" i "")) ; remove cb url parameter which does nothing
|
||||
(define key (regexp-replace #rx"[&?]$" no-cb "")) ; remove extra separator if necessary
|
||||
(define hash (sha1 (string->bytes/utf-8 key)))
|
||||
(cons key hash))
|
||||
|
||||
|
||||
;; 1. Download list of wiki pages and store in database, if not done yet for that wiki
|
||||
(define (if-necessary-download-list-of-pages wikiname callback)
|
||||
(define wiki-progress (query-maybe-value* "select progress from wiki where wikiname = ?" wikiname))
|
||||
;; done yet?
|
||||
(unless (and (real? wiki-progress) (wiki-progress . >= . 1))
|
||||
;; Count total pages
|
||||
(define-values (num-pages namespaces) (insert-wiki-entry wikiname))
|
||||
;; Download the entire index of pages
|
||||
(for*/fold ([total 0])
|
||||
([namespace namespaces]
|
||||
[redir-filter '("nonredirects" "redirects")])
|
||||
(let loop ([apcontinue ""]
|
||||
[basenames null])
|
||||
(cond
|
||||
[apcontinue
|
||||
(define url (format "https://~a.fandom.com/api.php?~a"
|
||||
wikiname
|
||||
(params->query `(("action" . "query")
|
||||
("list" . "allpages")
|
||||
("apnamespace" . ,(~a namespace))
|
||||
("apfilterredir" . ,redir-filter)
|
||||
("aplimit" . "500")
|
||||
("apcontinue" . ,apcontinue)
|
||||
("format" . "json")
|
||||
("formatversion" . "2")))))
|
||||
;; Download the current listing page
|
||||
(define res (get url))
|
||||
(define json (response-json res))
|
||||
;; Content from this page
|
||||
(define current-basenames
|
||||
(for/list ([page (jp "/query/allpages" json)])
|
||||
(title->basename (jp "/title" page))))
|
||||
(when ((length current-basenames) . > . 0)
|
||||
;; Report
|
||||
(if (equal? redir-filter "nonredirects")
|
||||
(callback (+ (length basenames) (length current-basenames) total) num-pages (last current-basenames))
|
||||
(callback total num-pages (last current-basenames))))
|
||||
;; Loop
|
||||
(loop (jp "/continue/apcontinue" json #f) (append basenames current-basenames))]
|
||||
[else
|
||||
;; All done with this (loop)! Save those pages into the database
|
||||
;; SQLite can have a maximum of 32766 parameters in a single query
|
||||
(begin0
|
||||
;; next for*/fold
|
||||
(if (equal? redir-filter "nonredirects")
|
||||
(+ (length basenames) total)
|
||||
total) ; redirects don't count for the site statistics total
|
||||
(call-with-transaction
|
||||
(get-slc)
|
||||
(λ ()
|
||||
(for ([slice (in-slice 32760 basenames)])
|
||||
(define query-template
|
||||
(string-join #:before-first "insert or ignore into page (wikiname, redirect, basename, progress) values "
|
||||
(make-list (length slice) "(?1, ?2, ?, 0)") ", "))
|
||||
(apply query-exec* query-template wikiname (if (equal? redir-filter "redirects") 1 sql-null) slice)))))])))
|
||||
;; Record that we have the complete list of pages
|
||||
(query-exec* "update wiki set progress = 1 where wikiname = ?" wikiname)))
|
||||
|
||||
|
||||
;; 2. Download each page via API and:
|
||||
;; * Save API response to file
|
||||
(define max-page-progress 1)
|
||||
(define (save-each-page wikiname callback)
|
||||
;; prepare destination folder
|
||||
(define save-dir (build-path archive-root wikiname))
|
||||
(make-directory* save-dir)
|
||||
;; gather list of basenames to download (that aren't yet complete)
|
||||
(define basenames (query-list* "select basename from page where wikiname = ? and progress < ? and redirect is null"
|
||||
wikiname max-page-progress))
|
||||
;; counter of complete/incomplete basenames
|
||||
(define already-done-count
|
||||
(query-value* "select count(*) from page where wikiname = ? and progress = ?"
|
||||
wikiname max-page-progress))
|
||||
(define not-done-count
|
||||
(query-value* "select count(*) from page where wikiname = ? and progress < ?"
|
||||
wikiname max-page-progress))
|
||||
(define total-count (+ already-done-count not-done-count))
|
||||
;; set initial progress
|
||||
(callback already-done-count total-count "")
|
||||
;; loop through basenames and download
|
||||
(for ([basename basenames]
|
||||
[i (in-naturals (add1 already-done-count))])
|
||||
(define name-for-query (basename->name-for-query basename))
|
||||
(define dest-url
|
||||
(format "https://~a.fandom.com/api.php?~a"
|
||||
wikiname
|
||||
(params->query `(("action" . "parse")
|
||||
("page" . ,name-for-query)
|
||||
("prop" . "text|headhtml|langlinks")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(define r (get dest-url))
|
||||
(define body (response-body r))
|
||||
(define filename (string-append basename ".json"))
|
||||
(define save-path
|
||||
(cond [((string-length basename) . > . 240)
|
||||
(define key (sha1 (string->bytes/latin-1 basename)))
|
||||
(query-exec* "insert into special_page (wikiname, key, basename) values (?, ?, ?)"
|
||||
wikiname key basename)
|
||||
(build-path save-dir (string-append key ".json"))]
|
||||
[#t
|
||||
(build-path save-dir (string-append basename ".json"))]))
|
||||
(display-to-file body save-path #:exists 'replace)
|
||||
(query-exec* "update page set progress = 1 where wikiname = ? and basename = ?"
|
||||
wikiname basename)
|
||||
(callback i total-count basename))
|
||||
;; save redirects as well
|
||||
(save-redirects wikiname callback (+ already-done-count (length basenames)) total-count)
|
||||
;; saved all pages, register that fact in the database
|
||||
(query-exec* "update wiki set progress = 2 where wikiname = ? and progress <= 2" wikiname))
|
||||
|
||||
|
||||
;; 2.5. Download each redirect-target via API and save mapping in database
|
||||
(define (save-redirects wikiname callback already-done-count total-count)
|
||||
(define basenames (query-list* "select basename from page where wikiname = ? and progress < ? and redirect = 1"
|
||||
wikiname max-page-progress))
|
||||
;; loop through basenames, in slices of 50 (MediaWiki API max per request), and download
|
||||
(for ([basename basenames]
|
||||
[i (in-naturals (add1 already-done-count))])
|
||||
(define dest-url
|
||||
(format "https://~a.fandom.com/api.php?~a"
|
||||
wikiname
|
||||
(params->query `(("action" . "query")
|
||||
("prop" . "links")
|
||||
("titles" . ,(basename->name-for-query basename))
|
||||
("format" . "json")
|
||||
("formatversion" . "2")))))
|
||||
(define res (get dest-url))
|
||||
(define json (response-json res))
|
||||
(define dest-title (jp "/query/pages/0/links/0/title" json #f))
|
||||
(callback i total-count basename)
|
||||
(cond
|
||||
[dest-title
|
||||
;; store it
|
||||
(define dest-basename (title->basename dest-title))
|
||||
(query-exec* "update page set progress = 1, redirect = ? where wikiname = ? and basename = ?" dest-basename wikiname basename)]
|
||||
[else
|
||||
;; the page just doesn't exist
|
||||
(query-exec* "delete from page where wikiname = ? and basename = ?" wikiname basename)])))
|
||||
|
||||
|
||||
;; 3. Download CSS and:
|
||||
;; * Save CSS to file
|
||||
;; * Record style images to database
|
||||
(define (if-necessary-download-and-check-styles wikiname callback)
|
||||
(define wiki-progress (query-maybe-value* "select progress from wiki where wikiname = ?" wikiname))
|
||||
(unless (and (number? wiki-progress) (wiki-progress . >= . 3))
|
||||
(define styles (download-styles-for-wiki wikiname callback))
|
||||
(define unique-image-urls
|
||||
(remove-duplicates
|
||||
(map image-url->values
|
||||
(flatten
|
||||
(for/list ([style styles])
|
||||
(check-style-for-images wikiname (cdr style)))))
|
||||
#:key cdr))
|
||||
(for ([pair unique-image-urls])
|
||||
(query-exec* "insert or ignore into image (wikiname, url, hash, ext, source, progress) values (?, ?, ?, NULL, 1, 0)" wikiname (car pair) (cdr pair)))
|
||||
(query-exec* "update wiki set progress = 3 where wikiname = ?" wikiname)))
|
||||
|
||||
|
||||
;; 4: From downloaded pages, record URLs of image sources and inline style images to database
|
||||
(define (check-json-for-images wikiname path)
|
||||
(define data (with-input-from-file path (λ () (read-json))))
|
||||
(define page (html->xexp (preprocess-html-wiki (jp "/parse/text" data))))
|
||||
(define tree (update-tree-wiki page wikiname))
|
||||
null
|
||||
#;(remove-duplicates
|
||||
(for/list ([element (in-producer
|
||||
(query-selector
|
||||
(λ (t a c)
|
||||
(and (eq? t 'img)
|
||||
(get-attribute 'src a)))
|
||||
tree)
|
||||
#f)])
|
||||
(image-url->values (get-attribute 'src (bits->attributes element))))))
|
||||
|
||||
|
||||
;; 5. Download image sources and style images according to database
|
||||
(define (save-each-image wikiname callback)
|
||||
(define source (hash-ref sources 'style)) ;; TODO: download entire wiki images instead?
|
||||
;; gather list of basenames to download (that aren't yet complete)
|
||||
(define rows (query-rows* "select url, hash from image where wikiname = ? and source <= ? and progress < 1"
|
||||
wikiname source))
|
||||
;; counter of complete/incomplete basenames
|
||||
(define already-done-count
|
||||
(query-value* "select count(*) from image where wikiname = ? and source <= ? and progress = 1"
|
||||
wikiname source))
|
||||
(define not-done-count
|
||||
(query-value* "select count(*) from image where wikiname = ? and source <= ? and progress < 1"
|
||||
wikiname source))
|
||||
;; set initial progress
|
||||
(callback already-done-count (+ already-done-count not-done-count) "")
|
||||
;; loop through urls and download
|
||||
(for ([row rows]
|
||||
[i (in-naturals 1)])
|
||||
;; row fragments
|
||||
(define url (vector-ref row 0))
|
||||
(define hash (vector-ref row 1))
|
||||
;; check
|
||||
#;(printf "~a -> ~a~n" url hash)
|
||||
(define r (get url #:timeouts (make-timeout-config #:connect 15)))
|
||||
(define declared-type (response-headers-ref r 'content-type))
|
||||
(define final-type (if (equal? declared-type #"application/octet-stream")
|
||||
(let ([sniff-entity (message-entity (mime-analyze (response-body r)))])
|
||||
(string->bytes/latin-1 (format "~a/~a" (entity-type sniff-entity) (entity-subtype sniff-entity))))
|
||||
declared-type))
|
||||
(define ext
|
||||
(with-handlers ([exn:fail:contract? (λ _ (error 'save-each-image "no ext found for mime type `~a` in file ~a" final-type url))])
|
||||
(bytes->string/latin-1 (mime-type->ext final-type))))
|
||||
;; save
|
||||
(define save-dir (hash->save-dir wikiname hash))
|
||||
(make-directory* save-dir)
|
||||
(define save-path (build-path save-dir (string-append hash "." ext)))
|
||||
(define body (response-body r))
|
||||
(display-to-file body save-path #:exists 'replace)
|
||||
(query-exec* "update image set progress = 1, ext = ? where wikiname = ? and hash = ?"
|
||||
ext wikiname hash)
|
||||
(callback (+ already-done-count i) (+ already-done-count not-done-count) (string-append (substring hash 0 6) "..." ext)))
|
||||
;; saved all images, register that fact in the database
|
||||
(query-exec* "update wiki set progress = 4 where wikiname = ?" wikiname))
|
||||
|
||||
(define all-stages
|
||||
(list
|
||||
if-necessary-download-list-of-pages
|
||||
save-each-page
|
||||
if-necessary-download-and-check-styles
|
||||
;; check-json-for-images
|
||||
save-each-image))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (html->xexp "<img src=\"https://example.com/images?src=Blah.jpg&width=150\">")
|
||||
'(*TOP* (img (@ (src "https://example.com/images?src=Blah.jpg&width=150")))))
|
||||
#;(download-list-of-pages "minecraft" values)
|
||||
#;(save-each-page "minecraft" values)
|
||||
#;(check-json-for-images "chiki" (build-path archive-root "chiki" "Fiona.json"))
|
||||
#;(do-step-3 "gallowmere")
|
||||
#;(save-each-image "gallowmere" (hash-ref sources 'style) (λ (a b c) (printf "~a/~a ~a~n" a b c)))
|
||||
|
||||
#;(for ([wikiname (query-list* "select wikiname from wiki")])
|
||||
(println wikiname)
|
||||
(insert-wiki-entry wikiname))
|
||||
|
||||
#;(for ([wikiname (query-list* "select wikiname from wiki")])
|
||||
(println wikiname)
|
||||
(do-step-3 wikiname)
|
||||
(save-each-image wikiname (hash-ref sources 'style) (λ (a b c) (printf "~a/~a ~a~n" a b c)))))
|
||||
|
||||
; (for ([stage all-stages]) (stage "create" (λ (a b c) (printf "~a/~a ~a~n" a b c))))
|
213
archiver/fts.rkt
213
archiver/fts.rkt
|
@ -1,213 +0,0 @@
|
|||
#lang cli
|
||||
(require (for-syntax racket/base))
|
||||
(require racket/format
|
||||
racket/function
|
||||
racket/future
|
||||
racket/match
|
||||
racket/path
|
||||
racket/promise
|
||||
racket/port
|
||||
racket/runtime-path
|
||||
racket/sequence
|
||||
racket/string
|
||||
file/gunzip
|
||||
db
|
||||
db/unsafe/sqlite3
|
||||
net/http-easy
|
||||
json
|
||||
json-pointer
|
||||
"../lib/html-parsing/main.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"../lib/tree-updater.rkt")
|
||||
|
||||
(flag (read-from-cache?)
|
||||
("-c" "--read-from-cache" "read from last run cache instead of rebuilding documents")
|
||||
(read-from-cache? #t))
|
||||
|
||||
(define-runtime-path storage-path "../storage/archive")
|
||||
|
||||
;; ***************************************************************************************************
|
||||
;; Progress bar display
|
||||
;; ***************************************************************************************************
|
||||
|
||||
(struct progress^ (n max title) #:transparent)
|
||||
|
||||
(define (make-m-s seconds)
|
||||
(define-values (eta-m eta-s) (quotient/remainder seconds 60))
|
||||
(format "~a:~a" eta-m (~a eta-s #:width 2 #:align 'right #:pad-string "0")))
|
||||
|
||||
(define (make-progress get-p [history-size 20])
|
||||
(define update-sleep 1)
|
||||
(define name-width 30)
|
||||
(define max-width 105)
|
||||
(define history (make-vector history-size 0))
|
||||
(define history-pointer 0)
|
||||
(define elapsed 0)
|
||||
(define (report-progress)
|
||||
(define p (get-p))
|
||||
(define history-cycle (vector-ref history history-pointer))
|
||||
(vector-set! history history-pointer (progress^-n p))
|
||||
(set! history-pointer (modulo (add1 history-pointer) history-size))
|
||||
(set! elapsed (add1 elapsed))
|
||||
(define-values (eta-display diff-per-second)
|
||||
(cond
|
||||
[((progress^-n p) . >= . (progress^-max p)) (values (format "~a **" (make-m-s elapsed)) (format "** ~a" (quotient (progress^-max p) (max elapsed 1))))]
|
||||
[(= history-cycle 0) (values "-:--" "--")]
|
||||
[else (define diff-per-second (/ (- (progress^-n p) history-cycle) (* history-size update-sleep)))
|
||||
(define eta-total
|
||||
(if (diff-per-second . > . 0)
|
||||
(floor (round (/ (- (progress^-max p) (progress^-n p)) diff-per-second)))
|
||||
0))
|
||||
(values (make-m-s eta-total)
|
||||
(round diff-per-second))]))
|
||||
(define left (format "~a/~a ~a/s ~a ~a%"
|
||||
(~a (progress^-n p) #:width (string-length (~a (progress^-max p))) #:align 'right #:pad-string " ")
|
||||
(progress^-max p)
|
||||
diff-per-second
|
||||
eta-display
|
||||
(floor (* 100 (/ (progress^-n p) (progress^-max p))))))
|
||||
(define name-display (~a (progress^-title p) #:max-width name-width #:limit-marker "..."))
|
||||
(define remaining-space (- max-width name-width (string-length left) 2))
|
||||
(define bar-width
|
||||
(floor (* (sub1 remaining-space)
|
||||
(/ (progress^-n p) (progress^-max p)))))
|
||||
(define bar (string-append (make-string bar-width #\=)
|
||||
">"
|
||||
(make-string (- remaining-space bar-width) #\ )))
|
||||
(printf "\e[2K\r~a~a~a" left bar name-display)
|
||||
(flush-output))
|
||||
(define (report-progress-loop)
|
||||
(sleep update-sleep)
|
||||
(report-progress)
|
||||
(report-progress-loop))
|
||||
(define t (thread report-progress-loop))
|
||||
(define (quit)
|
||||
(kill-thread t)
|
||||
(report-progress)
|
||||
(displayln ""))
|
||||
quit)
|
||||
|
||||
;; ***************************************************************************************************
|
||||
;; Page text extractor
|
||||
;; ***************************************************************************************************
|
||||
|
||||
(define (class-has? attributes substrs)
|
||||
(define cl (or (get-attribute 'class attributes) ""))
|
||||
(ormap (λ (substr) (string-contains? cl substr)) substrs))
|
||||
|
||||
(define (updater element element-type attributes children)
|
||||
(cond
|
||||
[(class-has? attributes '("collapsed" "selflink" "label" "toc" "editsection" "reviews"))
|
||||
(list 'div '() '())]
|
||||
[#t
|
||||
(list element-type attributes children)]))
|
||||
|
||||
(define (writer tables-mode? page)
|
||||
(define (writer-inner page)
|
||||
(for ([bit page])
|
||||
(cond
|
||||
[(and tables-mode? (pair? bit) (memq (car bit) '(h1 h2 h3 p blockquote q))) (void)]
|
||||
[(and (not tables-mode?) (pair? bit) (memq (car bit) '(ul ol dl table))) (void)]
|
||||
[(memq bit '(div p li td dd dt br)) (displayln "")]
|
||||
[(symbol? bit) (void)]
|
||||
[(and (pair? bit) (eq? (car bit) '*COMMENT*)) (void)]
|
||||
[(and (pair? bit) (eq? (car bit) '@)) (void)]
|
||||
[(pair? bit) (writer-inner bit)]
|
||||
[(string? bit) (display bit)])))
|
||||
(writer-inner page))
|
||||
|
||||
(define (write-and-post-process tables-mode? page)
|
||||
(define text (with-output-to-string (λ () (writer tables-mode? page))))
|
||||
;; (define text-no-numbers (regexp-replace* #px"(?:-|[+$£€¥] *)?[0-9,.]{2,}%?\\s*" text ""))
|
||||
(define shrink-text (regexp-replace* #px"([ \t]*\r?\n+)+" text "\n"))
|
||||
shrink-text)
|
||||
|
||||
(define ((extract f)) ; f - filename
|
||||
(with-handlers
|
||||
([exn:fail? (λ (err) (printf "extract: ~a: ~v~n" f err))])
|
||||
(define j
|
||||
(case (path-get-extension f)
|
||||
[(#".json")
|
||||
(with-input-from-file f (λ () (read-json)))]
|
||||
[(#".gz")
|
||||
(define-values (in out) (make-pipe))
|
||||
(with-input-from-file f (λ () (gunzip-through-ports (current-input-port) out)))
|
||||
(read-json in)]
|
||||
[else #f]))
|
||||
(define title (json-pointer-value "/parse/title" j))
|
||||
(define pageid (json-pointer-value "/parse/pageid" j))
|
||||
(define page-html (preprocess-html-wiki (json-pointer-value "/parse/text" j)))
|
||||
(define page (update-tree updater (html->xexp page-html)))
|
||||
(define body (write-and-post-process #f page))
|
||||
(define table (write-and-post-process #t page))
|
||||
(list title body table pageid)))
|
||||
|
||||
;; ***************************************************************************************************
|
||||
;; Program, loop, Solr APIs
|
||||
;; ***************************************************************************************************
|
||||
|
||||
(program
|
||||
(start [wikiname "wikiname to download"])
|
||||
|
||||
(define results
|
||||
(for/list ([f (directory-list (build-path storage-path wikiname) #:build? #t)]
|
||||
#:when (member (path-get-extension f) '(#".gz")))
|
||||
(extract f)))
|
||||
|
||||
(define data
|
||||
(cond
|
||||
[(and (read-from-cache?) (file-exists? "cache.rkt"))
|
||||
(define size (file-size "cache.rkt"))
|
||||
(call-with-input-file "cache.rkt"
|
||||
(λ (in)
|
||||
(define quit (make-progress (λ () (progress^ (ceiling (/ (file-position in) 64 1024))
|
||||
(ceiling (/ size 64 1024))
|
||||
"Reading in..."))
|
||||
2))
|
||||
(begin0
|
||||
(read in)
|
||||
(quit))))]
|
||||
[else
|
||||
(define x (box (progress^ 0 1 "...")))
|
||||
(define quit (make-progress (λ () (unbox x))))
|
||||
(define data
|
||||
(for/list ([fut results]
|
||||
[i (in-naturals 1)]
|
||||
#:do [(define page (fut))]
|
||||
#:when (not (void? page)))
|
||||
(match-define (list title body table pageid) page)
|
||||
(define len (string-length body))
|
||||
(set-box! x (progress^ i (length results) title))
|
||||
`#hasheq((id . ,(number->string pageid))
|
||||
(title . ,title)
|
||||
(body . ,body)
|
||||
(table . ,table)
|
||||
(len . ,len))))
|
||||
(quit)
|
||||
|
||||
(display "Writing out... ")
|
||||
(flush-output)
|
||||
(with-output-to-file "cache.rkt" (λ () (write data)) #:exists 'truncate/replace)
|
||||
data]))
|
||||
|
||||
(display "Converting... ")
|
||||
(flush-output)
|
||||
(define slice-size 30000)
|
||||
(define slices (ceiling (/ (length data) slice-size)))
|
||||
(for ([slice (in-slice slice-size data)]
|
||||
[i (in-naturals 1)])
|
||||
(define ser (jsexpr->bytes slice))
|
||||
(define ser-port (open-input-bytes ser))
|
||||
(define quit (make-progress (λ () (progress^ (ceiling (/ (file-position ser-port) 64 1024))
|
||||
(ceiling (/ (bytes-length ser) 64 1024))
|
||||
(format "Posting... (~a/~a)" i slices)))
|
||||
2))
|
||||
(define res
|
||||
(post (format "http://localhost:8983/solr/~a/update?commit=true" wikiname)
|
||||
#:data ser-port
|
||||
#:headers '#hasheq((Content-Type . "application/json"))
|
||||
#:timeouts (make-timeout-config #:lease 5 #:connect 5 #:request 300)))
|
||||
(quit)
|
||||
(displayln (response-status-line res))))
|
||||
|
||||
(run start)
|
|
@ -1,3 +0,0 @@
|
|||
#lang info
|
||||
|
||||
(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo" "net-cookies-lib" "gui-easy-lib" "sql" "charterm" "cli"))
|
|
@ -1 +0,0 @@
|
|||
((local (".")))
|
|
@ -1,8 +0,0 @@
|
|||
# Set of Catalan contractions for ElisionFilter
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
d
|
||||
l
|
||||
m
|
||||
n
|
||||
s
|
||||
t
|
|
@ -1,15 +0,0 @@
|
|||
# Set of French contractions for ElisionFilter
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
l
|
||||
m
|
||||
t
|
||||
qu
|
||||
n
|
||||
s
|
||||
j
|
||||
d
|
||||
c
|
||||
jusqu
|
||||
quoiqu
|
||||
lorsqu
|
||||
puisqu
|
|
@ -1,5 +0,0 @@
|
|||
# Set of Irish contractions for ElisionFilter
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
d
|
||||
m
|
||||
b
|
|
@ -1,23 +0,0 @@
|
|||
# Set of Italian contractions for ElisionFilter
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
c
|
||||
l
|
||||
all
|
||||
dall
|
||||
dell
|
||||
nell
|
||||
sull
|
||||
coll
|
||||
pell
|
||||
gl
|
||||
agl
|
||||
dagl
|
||||
degl
|
||||
negl
|
||||
sugl
|
||||
un
|
||||
m
|
||||
t
|
||||
s
|
||||
v
|
||||
d
|
|
@ -1,5 +0,0 @@
|
|||
# Set of Irish hyphenations for StopFilter
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
h
|
||||
n
|
||||
t
|
|
@ -1,6 +0,0 @@
|
|||
# Set of overrides for the dutch stemmer
|
||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
|
||||
fiets fiets
|
||||
bromfiets bromfiets
|
||||
ei eier
|
||||
kind kinder
|
|
@ -1,420 +0,0 @@
|
|||
#
|
||||
# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
|
||||
#
|
||||
# Any token with a part-of-speech tag that exactly matches those defined in this
|
||||
# file are removed from the token stream.
|
||||
#
|
||||
# Set your own stoptags by uncommenting the lines below. Note that comments are
|
||||
# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
|
||||
# etc. that can be useful for building you own stoptag set.
|
||||
#
|
||||
# The entire possible tagset is provided below for convenience.
|
||||
#
|
||||
#####
|
||||
# noun: unclassified nouns
|
||||
#名詞
|
||||
#
|
||||
# noun-common: Common nouns or nouns where the sub-classification is undefined
|
||||
#名詞-一般
|
||||
#
|
||||
# noun-proper: Proper nouns where the sub-classification is undefined
|
||||
#名詞-固有名詞
|
||||
#
|
||||
# noun-proper-misc: miscellaneous proper nouns
|
||||
#名詞-固有名詞-一般
|
||||
#
|
||||
# noun-proper-person: Personal names where the sub-classification is undefined
|
||||
#名詞-固有名詞-人名
|
||||
#
|
||||
# noun-proper-person-misc: names that cannot be divided into surname and
|
||||
# given name; foreign names; names where the surname or given name is unknown.
|
||||
# e.g. お市の方
|
||||
#名詞-固有名詞-人名-一般
|
||||
#
|
||||
# noun-proper-person-surname: Mainly Japanese surnames.
|
||||
# e.g. 山田
|
||||
#名詞-固有名詞-人名-姓
|
||||
#
|
||||
# noun-proper-person-given_name: Mainly Japanese given names.
|
||||
# e.g. 太郎
|
||||
#名詞-固有名詞-人名-名
|
||||
#
|
||||
# noun-proper-organization: Names representing organizations.
|
||||
# e.g. 通産省, NHK
|
||||
#名詞-固有名詞-組織
|
||||
#
|
||||
# noun-proper-place: Place names where the sub-classification is undefined
|
||||
#名詞-固有名詞-地域
|
||||
#
|
||||
# noun-proper-place-misc: Place names excluding countries.
|
||||
# e.g. アジア, バルセロナ, 京都
|
||||
#名詞-固有名詞-地域-一般
|
||||
#
|
||||
# noun-proper-place-country: Country names.
|
||||
# e.g. 日本, オーストラリア
|
||||
#名詞-固有名詞-地域-国
|
||||
#
|
||||
# noun-pronoun: Pronouns where the sub-classification is undefined
|
||||
#名詞-代名詞
|
||||
#
|
||||
# noun-pronoun-misc: miscellaneous pronouns:
|
||||
# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
|
||||
#名詞-代名詞-一般
|
||||
#
|
||||
# noun-pronoun-contraction: Spoken language contraction made by combining a
|
||||
# pronoun and the particle 'wa'.
|
||||
# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
|
||||
#名詞-代名詞-縮約
|
||||
#
|
||||
# noun-adverbial: Temporal nouns such as names of days or months that behave
|
||||
# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
|
||||
# e.g. 金曜, 一月, 午後, 少量
|
||||
#名詞-副詞可能
|
||||
#
|
||||
# noun-verbal: Nouns that take arguments with case and can appear followed by
|
||||
# 'suru' and related verbs (する, できる, なさる, くださる)
|
||||
# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
|
||||
#名詞-サ変接続
|
||||
#
|
||||
# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
|
||||
# e.g. 健康, 安易, 駄目, だめ
|
||||
#名詞-形容動詞語幹
|
||||
#
|
||||
# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
|
||||
# e.g. 0, 1, 2, 何, 数, 幾
|
||||
#名詞-数
|
||||
#
|
||||
# noun-affix: noun affixes where the sub-classification is undefined
|
||||
#名詞-非自立
|
||||
#
|
||||
# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
|
||||
# attach to the base form of inflectional words, words that cannot be classified
|
||||
# into any of the other categories below. This category includes indefinite nouns.
|
||||
# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
|
||||
# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
|
||||
# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
|
||||
# わり, 割り, 割, ん-口語/, もん-口語/
|
||||
#名詞-非自立-一般
|
||||
#
|
||||
# noun-affix-adverbial: noun affixes that that can behave as adverbs.
|
||||
# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
|
||||
# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
|
||||
# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
|
||||
# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
|
||||
# 儘, 侭, みぎり, 矢先
|
||||
#名詞-非自立-副詞可能
|
||||
#
|
||||
# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
|
||||
# with the stem よう(だ) ("you(da)").
|
||||
# e.g. よう, やう, 様 (よう)
|
||||
#名詞-非自立-助動詞語幹
|
||||
#
|
||||
# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
|
||||
# connection form な (aux "da").
|
||||
# e.g. みたい, ふう
|
||||
#名詞-非自立-形容動詞語幹
|
||||
#
|
||||
# noun-special: special nouns where the sub-classification is undefined.
|
||||
#名詞-特殊
|
||||
#
|
||||
# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
|
||||
# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
|
||||
# form of inflectional words.
|
||||
# e.g. そう
|
||||
#名詞-特殊-助動詞語幹
|
||||
#
|
||||
# noun-suffix: noun suffixes where the sub-classification is undefined.
|
||||
#名詞-接尾
|
||||
#
|
||||
# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
|
||||
# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
|
||||
# any of the other categories below. In general, this category is more inclusive than
|
||||
# 接尾語 ("suffix") and is usually the last element in a compound noun.
|
||||
# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
|
||||
# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
|
||||
#名詞-接尾-一般
|
||||
#
|
||||
# noun-suffix-person: Suffixes that form nouns and attach to person names more often
|
||||
# than other nouns.
|
||||
# e.g. 君, 様, 著
|
||||
#名詞-接尾-人名
|
||||
#
|
||||
# noun-suffix-place: Suffixes that form nouns and attach to place names more often
|
||||
# than other nouns.
|
||||
# e.g. 町, 市, 県
|
||||
#名詞-接尾-地域
|
||||
#
|
||||
# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
|
||||
# can appear before スル ("suru").
|
||||
# e.g. 化, 視, 分け, 入り, 落ち, 買い
|
||||
#名詞-接尾-サ変接続
|
||||
#
|
||||
# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
|
||||
# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
|
||||
# conjunctive form of inflectional words.
|
||||
# e.g. そう
|
||||
#名詞-接尾-助動詞語幹
|
||||
#
|
||||
# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
|
||||
# form of inflectional words and appear before the copula だ ("da").
|
||||
# e.g. 的, げ, がち
|
||||
#名詞-接尾-形容動詞語幹
|
||||
#
|
||||
# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
|
||||
# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
|
||||
#名詞-接尾-副詞可能
|
||||
#
|
||||
# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
|
||||
# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
|
||||
# to numbers.
|
||||
# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
|
||||
#名詞-接尾-助数詞
|
||||
#
|
||||
# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
|
||||
# e.g. (楽し) さ, (考え) 方
|
||||
#名詞-接尾-特殊
|
||||
#
|
||||
# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
|
||||
# together.
|
||||
# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
|
||||
#名詞-接続詞的
|
||||
#
|
||||
# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
|
||||
# semantically verb-like.
|
||||
# e.g. ごらん, ご覧, 御覧, 頂戴
|
||||
#名詞-動詞非自立的
|
||||
#
|
||||
# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
|
||||
# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
|
||||
# is いわく ("iwaku").
|
||||
#名詞-引用文字列
|
||||
#
|
||||
# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
|
||||
# behave like an adjective.
|
||||
# e.g. 申し訳, 仕方, とんでも, 違い
|
||||
#名詞-ナイ形容詞語幹
|
||||
#
|
||||
#####
|
||||
# prefix: unclassified prefixes
|
||||
#接頭詞
|
||||
#
|
||||
# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
|
||||
# excluding numerical expressions.
|
||||
# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
|
||||
#接頭詞-名詞接続
|
||||
#
|
||||
# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
|
||||
# in conjunctive form followed by なる/なさる/くださる.
|
||||
# e.g. お (読みなさい), お (座り)
|
||||
#接頭詞-動詞接続
|
||||
#
|
||||
# prefix-adjectival: Prefixes that attach to adjectives.
|
||||
# e.g. お (寒いですねえ), バカ (でかい)
|
||||
#接頭詞-形容詞接続
|
||||
#
|
||||
# prefix-numerical: Prefixes that attach to numerical expressions.
|
||||
# e.g. 約, およそ, 毎時
|
||||
#接頭詞-数接続
|
||||
#
|
||||
#####
|
||||
# verb: unclassified verbs
|
||||
#動詞
|
||||
#
|
||||
# verb-main:
|
||||
#動詞-自立
|
||||
#
|
||||
# verb-auxiliary:
|
||||
#動詞-非自立
|
||||
#
|
||||
# verb-suffix:
|
||||
#動詞-接尾
|
||||
#
|
||||
#####
|
||||
# adjective: unclassified adjectives
|
||||
#形容詞
|
||||
#
|
||||
# adjective-main:
|
||||
#形容詞-自立
|
||||
#
|
||||
# adjective-auxiliary:
|
||||
#形容詞-非自立
|
||||
#
|
||||
# adjective-suffix:
|
||||
#形容詞-接尾
|
||||
#
|
||||
#####
|
||||
# adverb: unclassified adverbs
|
||||
#副詞
|
||||
#
|
||||
# adverb-misc: Words that can be segmented into one unit and where adnominal
|
||||
# modification is not possible.
|
||||
# e.g. あいかわらず, 多分
|
||||
#副詞-一般
|
||||
#
|
||||
# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
|
||||
# な, する, だ, etc.
|
||||
# e.g. こんなに, そんなに, あんなに, なにか, なんでも
|
||||
#副詞-助詞類接続
|
||||
#
|
||||
#####
|
||||
# adnominal: Words that only have noun-modifying forms.
|
||||
# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
|
||||
# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
|
||||
# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
|
||||
#連体詞
|
||||
#
|
||||
#####
|
||||
# conjunction: Conjunctions that can occur independently.
|
||||
# e.g. が, けれども, そして, じゃあ, それどころか
|
||||
接続詞
|
||||
#
|
||||
#####
|
||||
# particle: unclassified particles.
|
||||
助詞
|
||||
#
|
||||
# particle-case: case particles where the subclassification is undefined.
|
||||
助詞-格助詞
|
||||
#
|
||||
# particle-case-misc: Case particles.
|
||||
# e.g. から, が, で, と, に, へ, より, を, の, にて
|
||||
助詞-格助詞-一般
|
||||
#
|
||||
# particle-case-quote: the "to" that appears after nouns, a person’s speech,
|
||||
# quotation marks, expressions of decisions from a meeting, reasons, judgements,
|
||||
# conjectures, etc.
|
||||
# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
|
||||
助詞-格助詞-引用
|
||||
#
|
||||
# particle-case-compound: Compounds of particles and verbs that mainly behave
|
||||
# like case particles.
|
||||
# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
|
||||
# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
|
||||
# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
|
||||
# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
|
||||
# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
|
||||
# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
|
||||
# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
|
||||
# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
|
||||
助詞-格助詞-連語
|
||||
#
|
||||
# particle-conjunctive:
|
||||
# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
|
||||
# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
|
||||
# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
|
||||
助詞-接続助詞
|
||||
#
|
||||
# particle-dependency:
|
||||
# e.g. こそ, さえ, しか, すら, は, も, ぞ
|
||||
助詞-係助詞
|
||||
#
|
||||
# particle-adverbial:
|
||||
# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
|
||||
# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
|
||||
# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
|
||||
# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
|
||||
# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
|
||||
助詞-副助詞
|
||||
#
|
||||
# particle-interjective: particles with interjective grammatical roles.
|
||||
# e.g. (松島) や
|
||||
助詞-間投助詞
|
||||
#
|
||||
# particle-coordinate:
|
||||
# e.g. と, たり, だの, だり, とか, なり, や, やら
|
||||
助詞-並立助詞
|
||||
#
|
||||
# particle-final:
|
||||
# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
|
||||
# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
|
||||
助詞-終助詞
|
||||
#
|
||||
# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
|
||||
# adverbial, conjunctive, or sentence final. For example:
|
||||
# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
|
||||
# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
|
||||
# 「(祈りが届いたせい) か (, 試験に合格した.)」
|
||||
# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
|
||||
# e.g. か
|
||||
助詞-副助詞/並立助詞/終助詞
|
||||
#
|
||||
# particle-adnominalizer: The "no" that attaches to nouns and modifies
|
||||
# non-inflectional words.
|
||||
助詞-連体化
|
||||
#
|
||||
# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
|
||||
# that are giongo, giseigo, or gitaigo.
|
||||
# e.g. に, と
|
||||
助詞-副詞化
|
||||
#
|
||||
# particle-special: A particle that does not fit into one of the above classifications.
|
||||
# This includes particles that are used in Tanka, Haiku, and other poetry.
|
||||
# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
|
||||
助詞-特殊
|
||||
#
|
||||
#####
|
||||
# auxiliary-verb:
|
||||
助動詞
|
||||
#
|
||||
#####
|
||||
# interjection: Greetings and other exclamations.
|
||||
# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
|
||||
# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
|
||||
#感動詞
|
||||
#
|
||||
#####
|
||||
# symbol: unclassified Symbols.
|
||||
記号
|
||||
#
|
||||
# symbol-misc: A general symbol not in one of the categories below.
|
||||
# e.g. [○◎@$〒→+]
|
||||
記号-一般
|
||||
#
|
||||
# symbol-comma: Commas
|
||||
# e.g. [,、]
|
||||
記号-読点
|
||||
#
|
||||
# symbol-period: Periods and full stops.
|
||||
# e.g. [..。]
|
||||
記号-句点
|
||||
#
|
||||
# symbol-space: Full-width whitespace.
|
||||
記号-空白
|
||||
#
|
||||
# symbol-open_bracket:
|
||||
# e.g. [({‘“『【]
|
||||
記号-括弧開
|
||||
#
|
||||
# symbol-close_bracket:
|
||||
# e.g. [)}’”』」】]
|
||||
記号-括弧閉
|
||||
#
|
||||
# symbol-alphabetic:
|
||||
#記号-アルファベット
|
||||
#
|
||||
#####
|
||||
# other: unclassified other
|
||||
#その他
|
||||
#
|
||||
# other-interjection: Words that are hard to classify as noun-suffixes or
|
||||
# sentence-final particles.
|
||||
# e.g. (だ)ァ
|
||||
その他-間投
|
||||
#
|
||||
#####
|
||||
# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
|
||||
# e.g. あの, うんと, えと
|
||||
フィラー
|
||||
#
|
||||
#####
|
||||
# non-verbal: non-verbal sound.
|
||||
非言語音
|
||||
#
|
||||
#####
|
||||
# fragment:
|
||||
#語断片
|
||||
#
|
||||
#####
|
||||
# unknown: unknown part of speech.
|
||||
#未知語
|
||||
#
|
||||
##### End of file
|
|
@ -1,125 +0,0 @@
|
|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
|
||||
# This means that when modifying this list, you might need to add some
|
||||
# redundant entries, for example containing forms with both أ and ا
|
||||
من
|
||||
ومن
|
||||
منها
|
||||
منه
|
||||
في
|
||||
وفي
|
||||
فيها
|
||||
فيه
|
||||
و
|
||||
ف
|
||||
ثم
|
||||
او
|
||||
أو
|
||||
ب
|
||||
بها
|
||||
به
|
||||
ا
|
||||
أ
|
||||
اى
|
||||
اي
|
||||
أي
|
||||
أى
|
||||
لا
|
||||
ولا
|
||||
الا
|
||||
ألا
|
||||
إلا
|
||||
لكن
|
||||
ما
|
||||
وما
|
||||
كما
|
||||
فما
|
||||
عن
|
||||
مع
|
||||
اذا
|
||||
إذا
|
||||
ان
|
||||
أن
|
||||
إن
|
||||
انها
|
||||
أنها
|
||||
إنها
|
||||
انه
|
||||
أنه
|
||||
إنه
|
||||
بان
|
||||
بأن
|
||||
فان
|
||||
فأن
|
||||
وان
|
||||
وأن
|
||||
وإن
|
||||
التى
|
||||
التي
|
||||
الذى
|
||||
الذي
|
||||
الذين
|
||||
الى
|
||||
الي
|
||||
إلى
|
||||
إلي
|
||||
على
|
||||
عليها
|
||||
عليه
|
||||
اما
|
||||
أما
|
||||
إما
|
||||
ايضا
|
||||
أيضا
|
||||
كل
|
||||
وكل
|
||||
لم
|
||||
ولم
|
||||
لن
|
||||
ولن
|
||||
هى
|
||||
هي
|
||||
هو
|
||||
وهى
|
||||
وهي
|
||||
وهو
|
||||
فهى
|
||||
فهي
|
||||
فهو
|
||||
انت
|
||||
أنت
|
||||
لك
|
||||
لها
|
||||
له
|
||||
هذه
|
||||
هذا
|
||||
تلك
|
||||
ذلك
|
||||
هناك
|
||||
كانت
|
||||
كان
|
||||
يكون
|
||||
تكون
|
||||
وكانت
|
||||
وكان
|
||||
غير
|
||||
بعض
|
||||
قد
|
||||
نحو
|
||||
بين
|
||||
بينما
|
||||
منذ
|
||||
ضمن
|
||||
حيث
|
||||
الان
|
||||
الآن
|
||||
خلال
|
||||
بعد
|
||||
قبل
|
||||
حتى
|
||||
عند
|
||||
عندما
|
||||
لدى
|
||||
جميع
|
|
@ -1,193 +0,0 @@
|
|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
а
|
||||
аз
|
||||
ако
|
||||
ала
|
||||
бе
|
||||
без
|
||||
беше
|
||||
би
|
||||
бил
|
||||
била
|
||||
били
|
||||
било
|
||||
близо
|
||||
бъдат
|
||||
бъде
|
||||
бяха
|
||||
в
|
||||
вас
|
||||
ваш
|
||||
ваша
|
||||
вероятно
|
||||
вече
|
||||
взема
|
||||
ви
|
||||
вие
|
||||
винаги
|
||||
все
|
||||
всеки
|
||||
всички
|
||||
всичко
|
||||
всяка
|
||||
във
|
||||
въпреки
|
||||
върху
|
||||
г
|
||||
ги
|
||||
главно
|
||||
го
|
||||
д
|
||||
да
|
||||
дали
|
||||
до
|
||||
докато
|
||||
докога
|
||||
дори
|
||||
досега
|
||||
доста
|
||||
е
|
||||
едва
|
||||
един
|
||||
ето
|
||||
за
|
||||
зад
|
||||
заедно
|
||||
заради
|
||||
засега
|
||||
затова
|
||||
защо
|
||||
защото
|
||||
и
|
||||
из
|
||||
или
|
||||
им
|
||||
има
|
||||
имат
|
||||
иска
|
||||
й
|
||||
каза
|
||||
как
|
||||
каква
|
||||
какво
|
||||
както
|
||||
какъв
|
||||
като
|
||||
кога
|
||||
когато
|
||||
което
|
||||
които
|
||||
кой
|
||||
който
|
||||
колко
|
||||
която
|
||||
къде
|
||||
където
|
||||
към
|
||||
ли
|
||||
м
|
||||
ме
|
||||
между
|
||||
мен
|
||||
ми
|
||||
мнозина
|
||||
мога
|
||||
могат
|
||||
може
|
||||
моля
|
||||
момента
|
||||
му
|
||||
н
|
||||
на
|
||||
над
|
||||
назад
|
||||
най
|
||||
направи
|
||||
напред
|
||||
например
|
||||
нас
|
||||
не
|
||||
него
|
||||
нея
|
||||
ни
|
||||
ние
|
||||
никой
|
||||
нито
|
||||
но
|
||||
някои
|
||||
някой
|
||||
няма
|
||||
обаче
|
||||
около
|
||||
освен
|
||||
особено
|
||||
от
|
||||
отгоре
|
||||
отново
|
||||
още
|
||||
пак
|
||||
по
|
||||
повече
|
||||
повечето
|
||||
под
|
||||
поне
|
||||
поради
|
||||
после
|
||||
почти
|
||||
прави
|
||||
пред
|
||||
преди
|
||||
през
|
||||
при
|
||||
пък
|
||||
първо
|
||||
с
|
||||
са
|
||||
само
|
||||
се
|
||||
сега
|
||||
си
|
||||
скоро
|
||||
след
|
||||
сме
|
||||
според
|
||||
сред
|
||||
срещу
|
||||
сте
|
||||
съм
|
||||
със
|
||||
също
|
||||
т
|
||||
тази
|
||||
така
|
||||
такива
|
||||
такъв
|
||||
там
|
||||
твой
|
||||
те
|
||||
тези
|
||||
ти
|
||||
тн
|
||||
то
|
||||
това
|
||||
тогава
|
||||
този
|
||||
той
|
||||
толкова
|
||||
точно
|
||||
трябва
|
||||
тук
|
||||
тъй
|
||||
тя
|
||||
тях
|
||||
у
|
||||
харесва
|
||||
ч
|
||||
че
|
||||
често
|
||||
чрез
|
||||
ще
|
||||
щом
|
||||
я
|
|
@ -1,220 +0,0 @@
|
|||
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
|
||||
a
|
||||
abans
|
||||
ací
|
||||
ah
|
||||
així
|
||||
això
|
||||
al
|
||||
als
|
||||
aleshores
|
||||
algun
|
||||
alguna
|
||||
algunes
|
||||
alguns
|
||||
alhora
|
||||
allà
|
||||
allí
|
||||
allò
|
||||
altra
|
||||
altre
|
||||
altres
|
||||
amb
|
||||
ambdós
|
||||
ambdues
|
||||
apa
|
||||
aquell
|
||||
aquella
|
||||
aquelles
|
||||
aquells
|
||||
aquest
|
||||
aquesta
|
||||
aquestes
|
||||
aquests
|
||||
aquí
|
||||
baix
|
||||
cada
|
||||
cadascú
|
||||
cadascuna
|
||||
cadascunes
|
||||
cadascuns
|
||||
com
|
||||
contra
|
||||
d'un
|
||||
d'una
|
||||
d'unes
|
||||
d'uns
|
||||
dalt
|
||||
de
|
||||
del
|
||||
dels
|
||||
des
|
||||
després
|
||||
dins
|
||||
dintre
|
||||
donat
|
||||
doncs
|
||||
durant
|
||||
e
|
||||
eh
|
||||
el
|
||||
els
|
||||
em
|
||||
en
|
||||
encara
|
||||
ens
|
||||
entre
|
||||
érem
|
||||
eren
|
||||
éreu
|
||||
es
|
||||
és
|
||||
esta
|
||||
està
|
||||
estàvem
|
||||
estaven
|
||||
estàveu
|
||||
esteu
|
||||
et
|
||||
etc
|
||||
ets
|
||||
fins
|
||||
fora
|
||||
gairebé
|
||||
ha
|
||||
han
|
||||
has
|
||||
havia
|
||||
he
|
||||
hem
|
||||
heu
|
||||
hi
|
||||
ho
|
||||
i
|
||||
igual
|
||||
iguals
|
||||
ja
|
||||
l'hi
|
||||
la
|
||||
les
|
||||
li
|
||||
li'n
|
||||
llavors
|
||||
m'he
|
||||
ma
|
||||
mal
|
||||
malgrat
|
||||
mateix
|
||||
mateixa
|
||||
mateixes
|
||||
mateixos
|
||||
me
|
||||
mentre
|
||||
més
|
||||
meu
|
||||
meus
|
||||
meva
|
||||
meves
|
||||
molt
|
||||
molta
|
||||
moltes
|
||||
molts
|
||||
mon
|
||||
mons
|
||||
n'he
|
||||
n'hi
|
||||
ne
|
||||
ni
|
||||
no
|
||||
nogensmenys
|
||||
només
|
||||
nosaltres
|
||||
nostra
|
||||
nostre
|
||||
nostres
|
||||
o
|
||||
oh
|
||||
oi
|
||||
on
|
||||
pas
|
||||
pel
|
||||
pels
|
||||
per
|
||||
però
|
||||
perquè
|
||||
poc
|
||||
poca
|
||||
pocs
|
||||
poques
|
||||
potser
|
||||
propi
|
||||
qual
|
||||
quals
|
||||
quan
|
||||
quant
|
||||
que
|
||||
què
|
||||
quelcom
|
||||
qui
|
||||
quin
|
||||
quina
|
||||
quines
|
||||
quins
|
||||
s'ha
|
||||
s'han
|
||||
sa
|
||||
semblant
|
||||
semblants
|
||||
ses
|
||||
seu
|
||||
seus
|
||||
seva
|
||||
seva
|
||||
seves
|
||||
si
|
||||
sobre
|
||||
sobretot
|
||||
sóc
|
||||
solament
|
||||
sols
|
||||
son
|
||||
són
|
||||
sons
|
||||
sota
|
||||
sou
|
||||
t'ha
|
||||
t'han
|
||||
t'he
|
||||
ta
|
||||
tal
|
||||
també
|
||||
tampoc
|
||||
tan
|
||||
tant
|
||||
tanta
|
||||
tantes
|
||||
teu
|
||||
teus
|
||||
teva
|
||||
teves
|
||||
ton
|
||||
tons
|
||||
tot
|
||||
tota
|
||||
totes
|
||||
tots
|
||||
un
|
||||
una
|
||||
unes
|
||||
uns
|
||||
us
|
||||
va
|
||||
vaig
|
||||
vam
|
||||
van
|
||||
vas
|
||||
veu
|
||||
vosaltres
|
||||
vostra
|
||||
vostre
|
||||
vostres
|
|
@ -1,172 +0,0 @@
|
|||
a
|
||||
s
|
||||
k
|
||||
o
|
||||
i
|
||||
u
|
||||
v
|
||||
z
|
||||
dnes
|
||||
cz
|
||||
tímto
|
||||
budeš
|
||||
budem
|
||||
byli
|
||||
jseš
|
||||
můj
|
||||
svým
|
||||
ta
|
||||
tomto
|
||||
tohle
|
||||
tuto
|
||||
tyto
|
||||
jej
|
||||
zda
|
||||
proč
|
||||
máte
|
||||
tato
|
||||
kam
|
||||
tohoto
|
||||
kdo
|
||||
kteří
|
||||
mi
|
||||
nám
|
||||
tom
|
||||
tomuto
|
||||
mít
|
||||
nic
|
||||
proto
|
||||
kterou
|
||||
byla
|
||||
toho
|
||||
protože
|
||||
asi
|
||||
ho
|
||||
naši
|
||||
napište
|
||||
re
|
||||
což
|
||||
tím
|
||||
takže
|
||||
svých
|
||||
její
|
||||
svými
|
||||
jste
|
||||
aj
|
||||
tu
|
||||
tedy
|
||||
teto
|
||||
bylo
|
||||
kde
|
||||
ke
|
||||
pravé
|
||||
ji
|
||||
nad
|
||||
nejsou
|
||||
či
|
||||
pod
|
||||
téma
|
||||
mezi
|
||||
přes
|
||||
ty
|
||||
pak
|
||||
vám
|
||||
ani
|
||||
když
|
||||
však
|
||||
neg
|
||||
jsem
|
||||
tento
|
||||
článku
|
||||
články
|
||||
aby
|
||||
jsme
|
||||
před
|
||||
pta
|
||||
jejich
|
||||
byl
|
||||
ještě
|
||||
až
|
||||
bez
|
||||
také
|
||||
pouze
|
||||
první
|
||||
vaše
|
||||
která
|
||||
nás
|
||||
nový
|
||||
tipy
|
||||
pokud
|
||||
může
|
||||
strana
|
||||
jeho
|
||||
své
|
||||
jiné
|
||||
zprávy
|
||||
nové
|
||||
není
|
||||
vás
|
||||
jen
|
||||
podle
|
||||
zde
|
||||
už
|
||||
být
|
||||
více
|
||||
bude
|
||||
již
|
||||
než
|
||||
který
|
||||
by
|
||||
které
|
||||
co
|
||||
nebo
|
||||
ten
|
||||
tak
|
||||
má
|
||||
při
|
||||
od
|
||||
po
|
||||
jsou
|
||||
jak
|
||||
další
|
||||
ale
|
||||
si
|
||||
se
|
||||
ve
|
||||
to
|
||||
jako
|
||||
za
|
||||
zpět
|
||||
ze
|
||||
do
|
||||
pro
|
||||
je
|
||||
na
|
||||
atd
|
||||
atp
|
||||
jakmile
|
||||
přičemž
|
||||
já
|
||||
on
|
||||
ona
|
||||
ono
|
||||
oni
|
||||
ony
|
||||
my
|
||||
vy
|
||||
jí
|
||||
ji
|
||||
mě
|
||||
mne
|
||||
jemu
|
||||
tomu
|
||||
těm
|
||||
těmu
|
||||
němu
|
||||
němuž
|
||||
jehož
|
||||
jíž
|
||||
jelikož
|
||||
jež
|
||||
jakož
|
||||
načež
|
|
@ -1,110 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Danish stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| This is a ranked list (commonest to rarest) of stopwords derived from
|
||||
| a large text sample.
|
||||
|
||||
|
||||
og | and
|
||||
i | in
|
||||
jeg | I
|
||||
det | that (dem. pronoun)/it (pers. pronoun)
|
||||
at | that (in front of a sentence)/to (with infinitive)
|
||||
en | a/an
|
||||
den | it (pers. pronoun)/that (dem. pronoun)
|
||||
til | to/at/for/until/against/by/of/into, more
|
||||
er | present tense of "to be"
|
||||
som | who, as
|
||||
på | on/upon/in/on/at/to/after/of/with/for, on
|
||||
de | they
|
||||
med | with/by/in, along
|
||||
han | he
|
||||
af | of/by/from/off/for/in/with/on, off
|
||||
for | at/for/to/from/by/of/ago, in front/before, because
|
||||
ikke | not
|
||||
der | who/which, there/those
|
||||
var | past tense of "to be"
|
||||
mig | me/myself
|
||||
sig | oneself/himself/herself/itself/themselves
|
||||
men | but
|
||||
et | a/an/one, one (number), someone/somebody/one
|
||||
har | present tense of "to have"
|
||||
om | round/about/for/in/a, about/around/down, if
|
||||
vi | we
|
||||
min | my
|
||||
havde | past tense of "to have"
|
||||
ham | him
|
||||
hun | she
|
||||
nu | now
|
||||
over | over/above/across/by/beyond/past/on/about, over/past
|
||||
da | then, when/as/since
|
||||
fra | from/off/since, off, since
|
||||
du | you
|
||||
ud | out
|
||||
sin | his/her/its/one's
|
||||
dem | them
|
||||
os | us/ourselves
|
||||
op | up
|
||||
man | you/one
|
||||
hans | his
|
||||
hvor | where
|
||||
eller | or
|
||||
hvad | what
|
||||
skal | must/shall etc.
|
||||
selv | myself/youself/herself/ourselves etc., even
|
||||
her | here
|
||||
alle | all/everyone/everybody etc.
|
||||
vil | will (verb)
|
||||
blev | past tense of "to stay/to remain/to get/to become"
|
||||
kunne | could
|
||||
ind | in
|
||||
når | when
|
||||
være | present tense of "to be"
|
||||
dog | however/yet/after all
|
||||
noget | something
|
||||
ville | would
|
||||
jo | you know/you see (adv), yes
|
||||
deres | their/theirs
|
||||
efter | after/behind/according to/for/by/from, later/afterwards
|
||||
ned | down
|
||||
skulle | should
|
||||
denne | this
|
||||
end | than
|
||||
dette | this
|
||||
mit | my/mine
|
||||
også | also
|
||||
under | under/beneath/below/during, below/underneath
|
||||
have | have
|
||||
dig | you
|
||||
anden | other
|
||||
hende | her
|
||||
mine | my
|
||||
alt | everything
|
||||
meget | much/very, plenty of
|
||||
sit | his, her, its, one's
|
||||
sine | his, her, its, one's
|
||||
vor | our
|
||||
mod | against
|
||||
disse | these
|
||||
hvis | if
|
||||
din | your/yours
|
||||
nogle | some
|
||||
hos | by/at
|
||||
blive | be/become
|
||||
mange | many
|
||||
ad | by/through
|
||||
bliver | present tense of "to be/to become"
|
||||
hendes | her/hers
|
||||
været | be
|
||||
thi | for (conj)
|
||||
jer | you
|
||||
sådan | such, like this/like that
|
|
@ -1,294 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A German stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| The number of forms in this list is reduced significantly by passing it
|
||||
| through the German stemmer.
|
||||
|
||||
|
||||
aber | but
|
||||
|
||||
alle | all
|
||||
allem
|
||||
allen
|
||||
aller
|
||||
alles
|
||||
|
||||
als | than, as
|
||||
also | so
|
||||
am | an + dem
|
||||
an | at
|
||||
|
||||
ander | other
|
||||
andere
|
||||
anderem
|
||||
anderen
|
||||
anderer
|
||||
anderes
|
||||
anderm
|
||||
andern
|
||||
anderr
|
||||
anders
|
||||
|
||||
auch | also
|
||||
auf | on
|
||||
aus | out of
|
||||
bei | by
|
||||
bin | am
|
||||
bis | until
|
||||
bist | art
|
||||
da | there
|
||||
damit | with it
|
||||
dann | then
|
||||
|
||||
der | the
|
||||
den
|
||||
des
|
||||
dem
|
||||
die
|
||||
das
|
||||
|
||||
daß | that
|
||||
|
||||
derselbe | the same
|
||||
derselben
|
||||
denselben
|
||||
desselben
|
||||
demselben
|
||||
dieselbe
|
||||
dieselben
|
||||
dasselbe
|
||||
|
||||
dazu | to that
|
||||
|
||||
dein | thy
|
||||
deine
|
||||
deinem
|
||||
deinen
|
||||
deiner
|
||||
deines
|
||||
|
||||
denn | because
|
||||
|
||||
derer | of those
|
||||
dessen | of him
|
||||
|
||||
dich | thee
|
||||
dir | to thee
|
||||
du | thou
|
||||
|
||||
dies | this
|
||||
diese
|
||||
diesem
|
||||
diesen
|
||||
dieser
|
||||
dieses
|
||||
|
||||
|
||||
doch | (several meanings)
|
||||
dort | (over) there
|
||||
|
||||
|
||||
durch | through
|
||||
|
||||
ein | a
|
||||
eine
|
||||
einem
|
||||
einen
|
||||
einer
|
||||
eines
|
||||
|
||||
einig | some
|
||||
einige
|
||||
einigem
|
||||
einigen
|
||||
einiger
|
||||
einiges
|
||||
|
||||
einmal | once
|
||||
|
||||
er | he
|
||||
ihn | him
|
||||
ihm | to him
|
||||
|
||||
es | it
|
||||
etwas | something
|
||||
|
||||
euer | your
|
||||
eure
|
||||
eurem
|
||||
euren
|
||||
eurer
|
||||
eures
|
||||
|
||||
für | for
|
||||
gegen | towards
|
||||
gewesen | p.p. of sein
|
||||
hab | have
|
||||
habe | have
|
||||
haben | have
|
||||
hat | has
|
||||
hatte | had
|
||||
hatten | had
|
||||
hier | here
|
||||
hin | there
|
||||
hinter | behind
|
||||
|
||||
ich | I
|
||||
mich | me
|
||||
mir | to me
|
||||
|
||||
|
||||
ihr | you, to her
|
||||
ihre
|
||||
ihrem
|
||||
ihren
|
||||
ihrer
|
||||
ihres
|
||||
euch | to you
|
||||
|
||||
im | in + dem
|
||||
in | in
|
||||
indem | while
|
||||
ins | in + das
|
||||
ist | is
|
||||
|
||||
jede | each, every
|
||||
jedem
|
||||
jeden
|
||||
jeder
|
||||
jedes
|
||||
|
||||
jene | that
|
||||
jenem
|
||||
jenen
|
||||
jener
|
||||
jenes
|
||||
|
||||
jetzt | now
|
||||
kann | can
|
||||
|
||||
kein | no
|
||||
keine
|
||||
keinem
|
||||
keinen
|
||||
keiner
|
||||
keines
|
||||
|
||||
können | can
|
||||
könnte | could
|
||||
machen | do
|
||||
man | one
|
||||
|
||||
manche | some, many a
|
||||
manchem
|
||||
manchen
|
||||
mancher
|
||||
manches
|
||||
|
||||
mein | my
|
||||
meine
|
||||
meinem
|
||||
meinen
|
||||
meiner
|
||||
meines
|
||||
|
||||
mit | with
|
||||
muss | must
|
||||
musste | had to
|
||||
nach | to(wards)
|
||||
nicht | not
|
||||
nichts | nothing
|
||||
noch | still, yet
|
||||
nun | now
|
||||
nur | only
|
||||
ob | whether
|
||||
oder | or
|
||||
ohne | without
|
||||
sehr | very
|
||||
|
||||
sein | his
|
||||
seine
|
||||
seinem
|
||||
seinen
|
||||
seiner
|
||||
seines
|
||||
|
||||
selbst | self
|
||||
sich | herself
|
||||
|
||||
sie | they, she
|
||||
ihnen | to them
|
||||
|
||||
sind | are
|
||||
so | so
|
||||
|
||||
solche | such
|
||||
solchem
|
||||
solchen
|
||||
solcher
|
||||
solches
|
||||
|
||||
soll | shall
|
||||
sollte | should
|
||||
sondern | but
|
||||
sonst | else
|
||||
über | over
|
||||
um | about, around
|
||||
und | and
|
||||
|
||||
uns | us
|
||||
unse
|
||||
unsem
|
||||
unsen
|
||||
unser
|
||||
unses
|
||||
|
||||
unter | under
|
||||
viel | much
|
||||
vom | von + dem
|
||||
von | from
|
||||
vor | before
|
||||
während | while
|
||||
war | was
|
||||
waren | were
|
||||
warst | wast
|
||||
was | what
|
||||
weg | away, off
|
||||
weil | because
|
||||
weiter | further
|
||||
|
||||
welche | which
|
||||
welchem
|
||||
welchen
|
||||
welcher
|
||||
welches
|
||||
|
||||
wenn | when
|
||||
werde | will
|
||||
werden | will
|
||||
wie | how
|
||||
wieder | again
|
||||
will | want
|
||||
wir | we
|
||||
wird | will
|
||||
wirst | willst
|
||||
wo | where
|
||||
wollen | want
|
||||
wollte | wanted
|
||||
würde | would
|
||||
würden | would
|
||||
zu | to
|
||||
zum | zu + dem
|
||||
zur | zu + der
|
||||
zwar | indeed
|
||||
zwischen | between
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
# Lucene Greek Stopwords list
|
||||
# Note: by default this file is used after GreekLowerCaseFilter,
|
||||
# so when modifying this file use 'σ' instead of 'ς'
|
||||
ο
|
||||
η
|
||||
το
|
||||
οι
|
||||
τα
|
||||
του
|
||||
τησ
|
||||
των
|
||||
τον
|
||||
την
|
||||
και
|
||||
κι
|
||||
κ
|
||||
ειμαι
|
||||
εισαι
|
||||
ειναι
|
||||
ειμαστε
|
||||
ειστε
|
||||
στο
|
||||
στον
|
||||
στη
|
||||
στην
|
||||
μα
|
||||
αλλα
|
||||
απο
|
||||
για
|
||||
προσ
|
||||
με
|
||||
σε
|
||||
ωσ
|
||||
παρα
|
||||
αντι
|
||||
κατα
|
||||
μετα
|
||||
θα
|
||||
να
|
||||
δε
|
||||
δεν
|
||||
μη
|
||||
μην
|
||||
επι
|
||||
ενω
|
||||
εαν
|
||||
αν
|
||||
τοτε
|
||||
που
|
||||
πωσ
|
||||
ποιοσ
|
||||
ποια
|
||||
ποιο
|
||||
ποιοι
|
||||
ποιεσ
|
||||
ποιων
|
||||
ποιουσ
|
||||
αυτοσ
|
||||
αυτη
|
||||
αυτο
|
||||
αυτοι
|
||||
αυτων
|
||||
αυτουσ
|
||||
αυτεσ
|
||||
αυτα
|
||||
εκεινοσ
|
||||
εκεινη
|
||||
εκεινο
|
||||
εκεινοι
|
||||
εκεινεσ
|
||||
εκεινα
|
||||
εκεινων
|
||||
εκεινουσ
|
||||
οπωσ
|
||||
ομωσ
|
||||
ισωσ
|
||||
οσο
|
||||
οτι
|
|
@ -1,54 +0,0 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# a couple of test stopwords to test that the words are really being
|
||||
# configured from this file:
|
||||
stopworda
|
||||
stopwordb
|
||||
|
||||
# Standard english stop words taken from Lucene's StopAnalyzer
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
such
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
|
@ -1,356 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Spanish stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
|
||||
| The following is a ranked list (commonest to rarest) of stopwords
|
||||
| deriving from a large sample of text.
|
||||
|
||||
| Extra words have been added at the end.
|
||||
|
||||
de | from, of
|
||||
la | the, her
|
||||
que | who, that
|
||||
el | the
|
||||
en | in
|
||||
y | and
|
||||
a | to
|
||||
los | the, them
|
||||
del | de + el
|
||||
se | himself, from him etc
|
||||
las | the, them
|
||||
por | for, by, etc
|
||||
un | a
|
||||
para | for
|
||||
con | with
|
||||
no | no
|
||||
una | a
|
||||
su | his, her
|
||||
al | a + el
|
||||
| es from SER
|
||||
lo | him
|
||||
como | how
|
||||
más | more
|
||||
pero | pero
|
||||
sus | su plural
|
||||
le | to him, her
|
||||
ya | already
|
||||
o | or
|
||||
| fue from SER
|
||||
este | this
|
||||
| ha from HABER
|
||||
sí | himself etc
|
||||
porque | because
|
||||
esta | this
|
||||
| son from SER
|
||||
entre | between
|
||||
| está from ESTAR
|
||||
cuando | when
|
||||
muy | very
|
||||
sin | without
|
||||
sobre | on
|
||||
| ser from SER
|
||||
| tiene from TENER
|
||||
también | also
|
||||
me | me
|
||||
hasta | until
|
||||
hay | there is/are
|
||||
donde | where
|
||||
| han from HABER
|
||||
quien | whom, that
|
||||
| están from ESTAR
|
||||
| estado from ESTAR
|
||||
desde | from
|
||||
todo | all
|
||||
nos | us
|
||||
durante | during
|
||||
| estados from ESTAR
|
||||
todos | all
|
||||
uno | a
|
||||
les | to them
|
||||
ni | nor
|
||||
contra | against
|
||||
otros | other
|
||||
| fueron from SER
|
||||
ese | that
|
||||
eso | that
|
||||
| había from HABER
|
||||
ante | before
|
||||
ellos | they
|
||||
e | and (variant of y)
|
||||
esto | this
|
||||
mí | me
|
||||
antes | before
|
||||
algunos | some
|
||||
qué | what?
|
||||
unos | a
|
||||
yo | I
|
||||
otro | other
|
||||
otras | other
|
||||
otra | other
|
||||
él | he
|
||||
tanto | so much, many
|
||||
esa | that
|
||||
estos | these
|
||||
mucho | much, many
|
||||
quienes | who
|
||||
nada | nothing
|
||||
muchos | many
|
||||
cual | who
|
||||
| sea from SER
|
||||
poco | few
|
||||
ella | she
|
||||
estar | to be
|
||||
| haber from HABER
|
||||
estas | these
|
||||
| estaba from ESTAR
|
||||
| estamos from ESTAR
|
||||
algunas | some
|
||||
algo | something
|
||||
nosotros | we
|
||||
|
||||
| other forms
|
||||
|
||||
mi | me
|
||||
mis | mi plural
|
||||
tú | thou
|
||||
te | thee
|
||||
ti | thee
|
||||
tu | thy
|
||||
tus | tu plural
|
||||
ellas | they
|
||||
nosotras | we
|
||||
vosotros | you
|
||||
vosotras | you
|
||||
os | you
|
||||
mío | mine
|
||||
mía |
|
||||
míos |
|
||||
mías |
|
||||
tuyo | thine
|
||||
tuya |
|
||||
tuyos |
|
||||
tuyas |
|
||||
suyo | his, hers, theirs
|
||||
suya |
|
||||
suyos |
|
||||
suyas |
|
||||
nuestro | ours
|
||||
nuestra |
|
||||
nuestros |
|
||||
nuestras |
|
||||
vuestro | yours
|
||||
vuestra |
|
||||
vuestros |
|
||||
vuestras |
|
||||
esos | those
|
||||
esas | those
|
||||
|
||||
| forms of estar, to be (not including the infinitive):
|
||||
estoy
|
||||
estás
|
||||
está
|
||||
estamos
|
||||
estáis
|
||||
están
|
||||
esté
|
||||
estés
|
||||
estemos
|
||||
estéis
|
||||
estén
|
||||
estaré
|
||||
estarás
|
||||
estará
|
||||
estaremos
|
||||
estaréis
|
||||
estarán
|
||||
estaría
|
||||
estarías
|
||||
estaríamos
|
||||
estaríais
|
||||
estarían
|
||||
estaba
|
||||
estabas
|
||||
estábamos
|
||||
estabais
|
||||
estaban
|
||||
estuve
|
||||
estuviste
|
||||
estuvo
|
||||
estuvimos
|
||||
estuvisteis
|
||||
estuvieron
|
||||
estuviera
|
||||
estuvieras
|
||||
estuviéramos
|
||||
estuvierais
|
||||
estuvieran
|
||||
estuviese
|
||||
estuvieses
|
||||
estuviésemos
|
||||
estuvieseis
|
||||
estuviesen
|
||||
estando
|
||||
estado
|
||||
estada
|
||||
estados
|
||||
estadas
|
||||
estad
|
||||
|
||||
| forms of haber, to have (not including the infinitive):
|
||||
he
|
||||
has
|
||||
ha
|
||||
hemos
|
||||
habéis
|
||||
han
|
||||
haya
|
||||
hayas
|
||||
hayamos
|
||||
hayáis
|
||||
hayan
|
||||
habré
|
||||
habrás
|
||||
habrá
|
||||
habremos
|
||||
habréis
|
||||
habrán
|
||||
habría
|
||||
habrías
|
||||
habríamos
|
||||
habríais
|
||||
habrían
|
||||
había
|
||||
habías
|
||||
habíamos
|
||||
habíais
|
||||
habían
|
||||
hube
|
||||
hubiste
|
||||
hubo
|
||||
hubimos
|
||||
hubisteis
|
||||
hubieron
|
||||
hubiera
|
||||
hubieras
|
||||
hubiéramos
|
||||
hubierais
|
||||
hubieran
|
||||
hubiese
|
||||
hubieses
|
||||
hubiésemos
|
||||
hubieseis
|
||||
hubiesen
|
||||
habiendo
|
||||
habido
|
||||
habida
|
||||
habidos
|
||||
habidas
|
||||
|
||||
| forms of ser, to be (not including the infinitive):
|
||||
soy
|
||||
eres
|
||||
es
|
||||
somos
|
||||
sois
|
||||
son
|
||||
sea
|
||||
seas
|
||||
seamos
|
||||
seáis
|
||||
sean
|
||||
seré
|
||||
serás
|
||||
será
|
||||
seremos
|
||||
seréis
|
||||
serán
|
||||
sería
|
||||
serías
|
||||
seríamos
|
||||
seríais
|
||||
serían
|
||||
era
|
||||
eras
|
||||
éramos
|
||||
erais
|
||||
eran
|
||||
fui
|
||||
fuiste
|
||||
fue
|
||||
fuimos
|
||||
fuisteis
|
||||
fueron
|
||||
fuera
|
||||
fueras
|
||||
fuéramos
|
||||
fuerais
|
||||
fueran
|
||||
fuese
|
||||
fueses
|
||||
fuésemos
|
||||
fueseis
|
||||
fuesen
|
||||
siendo
|
||||
sido
|
||||
| sed also means 'thirst'
|
||||
|
||||
| forms of tener, to have (not including the infinitive):
|
||||
tengo
|
||||
tienes
|
||||
tiene
|
||||
tenemos
|
||||
tenéis
|
||||
tienen
|
||||
tenga
|
||||
tengas
|
||||
tengamos
|
||||
tengáis
|
||||
tengan
|
||||
tendré
|
||||
tendrás
|
||||
tendrá
|
||||
tendremos
|
||||
tendréis
|
||||
tendrán
|
||||
tendría
|
||||
tendrías
|
||||
tendríamos
|
||||
tendríais
|
||||
tendrían
|
||||
tenía
|
||||
tenías
|
||||
teníamos
|
||||
teníais
|
||||
tenían
|
||||
tuve
|
||||
tuviste
|
||||
tuvo
|
||||
tuvimos
|
||||
tuvisteis
|
||||
tuvieron
|
||||
tuviera
|
||||
tuvieras
|
||||
tuviéramos
|
||||
tuvierais
|
||||
tuvieran
|
||||
tuviese
|
||||
tuvieses
|
||||
tuviésemos
|
||||
tuvieseis
|
||||
tuviesen
|
||||
teniendo
|
||||
tenido
|
||||
tenida
|
||||
tenidos
|
||||
tenidas
|
||||
tened
|
||||
|
File diff suppressed because it is too large
Load diff
|
@ -1,99 +0,0 @@
|
|||
# example set of basque stopwords
|
||||
al
|
||||
anitz
|
||||
arabera
|
||||
asko
|
||||
baina
|
||||
bat
|
||||
batean
|
||||
batek
|
||||
bati
|
||||
batzuei
|
||||
batzuek
|
||||
batzuetan
|
||||
batzuk
|
||||
bera
|
||||
beraiek
|
||||
berau
|
||||
berauek
|
||||
bere
|
||||
berori
|
||||
beroriek
|
||||
beste
|
||||
bezala
|
||||
da
|
||||
dago
|
||||
dira
|
||||
ditu
|
||||
du
|
||||
dute
|
||||
edo
|
||||
egin
|
||||
ere
|
||||
eta
|
||||
eurak
|
||||
ez
|
||||
gainera
|
||||
gu
|
||||
gutxi
|
||||
guzti
|
||||
haiei
|
||||
haiek
|
||||
haietan
|
||||
hainbeste
|
||||
hala
|
||||
han
|
||||
handik
|
||||
hango
|
||||
hara
|
||||
hari
|
||||
hark
|
||||
hartan
|
||||
hau
|
||||
hauei
|
||||
hauek
|
||||
hauetan
|
||||
hemen
|
||||
hemendik
|
||||
hemengo
|
||||
hi
|
||||
hona
|
||||
honek
|
||||
honela
|
||||
honetan
|
||||
honi
|
||||
hor
|
||||
hori
|
||||
horiei
|
||||
horiek
|
||||
horietan
|
||||
horko
|
||||
horra
|
||||
horrek
|
||||
horrela
|
||||
horretan
|
||||
horri
|
||||
hortik
|
||||
hura
|
||||
izan
|
||||
ni
|
||||
noiz
|
||||
nola
|
||||
non
|
||||
nondik
|
||||
nongo
|
||||
nor
|
||||
nora
|
||||
ze
|
||||
zein
|
||||
zen
|
||||
zenbait
|
||||
zenbat
|
||||
zer
|
||||
zergatik
|
||||
ziren
|
||||
zituen
|
||||
zu
|
||||
zuek
|
||||
zuen
|
||||
zuten
|
|
@ -1,313 +0,0 @@
|
|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
# Note: by default this file is used after normalization, so when adding entries
|
||||
# to this file, use the arabic 'ي' instead of 'ی'
|
||||
انان
|
||||
نداشته
|
||||
سراسر
|
||||
خياه
|
||||
ايشان
|
||||
وي
|
||||
تاكنون
|
||||
بيشتري
|
||||
دوم
|
||||
پس
|
||||
ناشي
|
||||
وگو
|
||||
يا
|
||||
داشتند
|
||||
سپس
|
||||
هنگام
|
||||
هرگز
|
||||
پنج
|
||||
نشان
|
||||
امسال
|
||||
ديگر
|
||||
گروهي
|
||||
شدند
|
||||
چطور
|
||||
ده
|
||||
و
|
||||
دو
|
||||
نخستين
|
||||
ولي
|
||||
چرا
|
||||
چه
|
||||
وسط
|
||||
ه
|
||||
كدام
|
||||
قابل
|
||||
يك
|
||||
رفت
|
||||
هفت
|
||||
همچنين
|
||||
در
|
||||
هزار
|
||||
بله
|
||||
بلي
|
||||
شايد
|
||||
اما
|
||||
شناسي
|
||||
گرفته
|
||||
دهد
|
||||
داشته
|
||||
دانست
|
||||
داشتن
|
||||
خواهيم
|
||||
ميليارد
|
||||
وقتيكه
|
||||
امد
|
||||
خواهد
|
||||
جز
|
||||
اورده
|
||||
شده
|
||||
بلكه
|
||||
خدمات
|
||||
شدن
|
||||
برخي
|
||||
نبود
|
||||
بسياري
|
||||
جلوگيري
|
||||
حق
|
||||
كردند
|
||||
نوعي
|
||||
بعري
|
||||
نكرده
|
||||
نظير
|
||||
نبايد
|
||||
بوده
|
||||
بودن
|
||||
داد
|
||||
اورد
|
||||
هست
|
||||
جايي
|
||||
شود
|
||||
دنبال
|
||||
داده
|
||||
بايد
|
||||
سابق
|
||||
هيچ
|
||||
همان
|
||||
انجا
|
||||
كمتر
|
||||
كجاست
|
||||
گردد
|
||||
كسي
|
||||
تر
|
||||
مردم
|
||||
تان
|
||||
دادن
|
||||
بودند
|
||||
سري
|
||||
جدا
|
||||
ندارند
|
||||
مگر
|
||||
يكديگر
|
||||
دارد
|
||||
دهند
|
||||
بنابراين
|
||||
هنگامي
|
||||
سمت
|
||||
جا
|
||||
انچه
|
||||
خود
|
||||
دادند
|
||||
زياد
|
||||
دارند
|
||||
اثر
|
||||
بدون
|
||||
بهترين
|
||||
بيشتر
|
||||
البته
|
||||
به
|
||||
براساس
|
||||
بيرون
|
||||
كرد
|
||||
بعضي
|
||||
گرفت
|
||||
توي
|
||||
اي
|
||||
ميليون
|
||||
او
|
||||
جريان
|
||||
تول
|
||||
بر
|
||||
مانند
|
||||
برابر
|
||||
باشيم
|
||||
مدتي
|
||||
گويند
|
||||
اكنون
|
||||
تا
|
||||
تنها
|
||||
جديد
|
||||
چند
|
||||
بي
|
||||
نشده
|
||||
كردن
|
||||
كردم
|
||||
گويد
|
||||
كرده
|
||||
كنيم
|
||||
نمي
|
||||
نزد
|
||||
روي
|
||||
قصد
|
||||
فقط
|
||||
بالاي
|
||||
ديگران
|
||||
اين
|
||||
ديروز
|
||||
توسط
|
||||
سوم
|
||||
ايم
|
||||
دانند
|
||||
سوي
|
||||
استفاده
|
||||
شما
|
||||
كنار
|
||||
داريم
|
||||
ساخته
|
||||
طور
|
||||
امده
|
||||
رفته
|
||||
نخست
|
||||
بيست
|
||||
نزديك
|
||||
طي
|
||||
كنيد
|
||||
از
|
||||
انها
|
||||
تمامي
|
||||
داشت
|
||||
يكي
|
||||
طريق
|
||||
اش
|
||||
چيست
|
||||
روب
|
||||
نمايد
|
||||
گفت
|
||||
چندين
|
||||
چيزي
|
||||
تواند
|
||||
ام
|
||||
ايا
|
||||
با
|
||||
ان
|
||||
ايد
|
||||
ترين
|
||||
اينكه
|
||||
ديگري
|
||||
راه
|
||||
هايي
|
||||
بروز
|
||||
همچنان
|
||||
پاعين
|
||||
كس
|
||||
حدود
|
||||
مختلف
|
||||
مقابل
|
||||
چيز
|
||||
گيرد
|
||||
ندارد
|
||||
ضد
|
||||
همچون
|
||||
سازي
|
||||
شان
|
||||
مورد
|
||||
باره
|
||||
مرسي
|
||||
خويش
|
||||
برخوردار
|
||||
چون
|
||||
خارج
|
||||
شش
|
||||
هنوز
|
||||
تحت
|
||||
ضمن
|
||||
هستيم
|
||||
گفته
|
||||
فكر
|
||||
بسيار
|
||||
پيش
|
||||
براي
|
||||
روزهاي
|
||||
انكه
|
||||
نخواهد
|
||||
بالا
|
||||
كل
|
||||
وقتي
|
||||
كي
|
||||
چنين
|
||||
كه
|
||||
گيري
|
||||
نيست
|
||||
است
|
||||
كجا
|
||||
كند
|
||||
نيز
|
||||
يابد
|
||||
بندي
|
||||
حتي
|
||||
توانند
|
||||
عقب
|
||||
خواست
|
||||
كنند
|
||||
بين
|
||||
تمام
|
||||
همه
|
||||
ما
|
||||
باشند
|
||||
مثل
|
||||
شد
|
||||
اري
|
||||
باشد
|
||||
اره
|
||||
طبق
|
||||
بعد
|
||||
اگر
|
||||
صورت
|
||||
غير
|
||||
جاي
|
||||
بيش
|
||||
ريزي
|
||||
اند
|
||||
زيرا
|
||||
چگونه
|
||||
بار
|
||||
لطفا
|
||||
مي
|
||||
درباره
|
||||
من
|
||||
ديده
|
||||
همين
|
||||
گذاري
|
||||
برداري
|
||||
علت
|
||||
گذاشته
|
||||
هم
|
||||
فوق
|
||||
نه
|
||||
ها
|
||||
شوند
|
||||
اباد
|
||||
همواره
|
||||
هر
|
||||
اول
|
||||
خواهند
|
||||
چهار
|
||||
نام
|
||||
امروز
|
||||
مان
|
||||
هاي
|
||||
قبل
|
||||
كنم
|
||||
سعي
|
||||
تازه
|
||||
را
|
||||
هستند
|
||||
زير
|
||||
جلوي
|
||||
عنوان
|
||||
بود
|
|
@ -1,97 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| forms of BE
|
||||
|
||||
olla
|
||||
olen
|
||||
olet
|
||||
on
|
||||
olemme
|
||||
olette
|
||||
ovat
|
||||
ole | negative form
|
||||
|
||||
oli
|
||||
olisi
|
||||
olisit
|
||||
olisin
|
||||
olisimme
|
||||
olisitte
|
||||
olisivat
|
||||
olit
|
||||
olin
|
||||
olimme
|
||||
olitte
|
||||
olivat
|
||||
ollut
|
||||
olleet
|
||||
|
||||
en | negation
|
||||
et
|
||||
ei
|
||||
emme
|
||||
ette
|
||||
eivät
|
||||
|
||||
|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
|
||||
minä minun minut minua minussa minusta minuun minulla minulta minulle | I
|
||||
sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
|
||||
hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
|
||||
me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
|
||||
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
|
||||
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
|
||||
|
||||
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
|
||||
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
|
||||
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
|
||||
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
|
||||
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
|
||||
ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
|
||||
|
||||
kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
|
||||
ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
|
||||
mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
|
||||
mitkä | (pl)
|
||||
|
||||
joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
|
||||
jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
|
||||
|
||||
| conjunctions
|
||||
|
||||
että | that
|
||||
ja | and
|
||||
jos | if
|
||||
koska | because
|
||||
kuin | than
|
||||
mutta | but
|
||||
niin | so
|
||||
sekä | and
|
||||
sillä | for
|
||||
tai | or
|
||||
vaan | but
|
||||
vai | or
|
||||
vaikka | although
|
||||
|
||||
|
||||
| prepositions
|
||||
|
||||
kanssa | with
|
||||
mukaan | according to
|
||||
noin | about
|
||||
poikki | across
|
||||
yli | over, across
|
||||
|
||||
| other
|
||||
|
||||
kun | when
|
||||
niin | so
|
||||
nyt | now
|
||||
itse | self
|
||||
|
|
@ -1,186 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A French stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
au | a + le
|
||||
aux | a + les
|
||||
avec | with
|
||||
ce | this
|
||||
ces | these
|
||||
dans | with
|
||||
de | of
|
||||
des | de + les
|
||||
du | de + le
|
||||
elle | she
|
||||
en | `of them' etc
|
||||
et | and
|
||||
eux | them
|
||||
il | he
|
||||
je | I
|
||||
la | the
|
||||
le | the
|
||||
leur | their
|
||||
lui | him
|
||||
ma | my (fem)
|
||||
mais | but
|
||||
me | me
|
||||
même | same; as in moi-même (myself) etc
|
||||
mes | me (pl)
|
||||
moi | me
|
||||
mon | my (masc)
|
||||
ne | not
|
||||
nos | our (pl)
|
||||
notre | our
|
||||
nous | we
|
||||
on | one
|
||||
ou | where
|
||||
par | by
|
||||
pas | not
|
||||
pour | for
|
||||
qu | que before vowel
|
||||
que | that
|
||||
qui | who
|
||||
sa | his, her (fem)
|
||||
se | oneself
|
||||
ses | his (pl)
|
||||
son | his, her (masc)
|
||||
sur | on
|
||||
ta | thy (fem)
|
||||
te | thee
|
||||
tes | thy (pl)
|
||||
toi | thee
|
||||
ton | thy (masc)
|
||||
tu | thou
|
||||
un | a
|
||||
une | a
|
||||
vos | your (pl)
|
||||
votre | your
|
||||
vous | you
|
||||
|
||||
| single letter forms
|
||||
|
||||
c | c'
|
||||
d | d'
|
||||
j | j'
|
||||
l | l'
|
||||
à | to, at
|
||||
m | m'
|
||||
n | n'
|
||||
s | s'
|
||||
t | t'
|
||||
y | there
|
||||
|
||||
| forms of être (not including the infinitive):
|
||||
été
|
||||
étée
|
||||
étées
|
||||
étés
|
||||
étant
|
||||
suis
|
||||
es
|
||||
est
|
||||
sommes
|
||||
êtes
|
||||
sont
|
||||
serai
|
||||
seras
|
||||
sera
|
||||
serons
|
||||
serez
|
||||
seront
|
||||
serais
|
||||
serait
|
||||
serions
|
||||
seriez
|
||||
seraient
|
||||
étais
|
||||
était
|
||||
étions
|
||||
étiez
|
||||
étaient
|
||||
fus
|
||||
fut
|
||||
fûmes
|
||||
fûtes
|
||||
furent
|
||||
sois
|
||||
soit
|
||||
soyons
|
||||
soyez
|
||||
soient
|
||||
fusse
|
||||
fusses
|
||||
fût
|
||||
fussions
|
||||
fussiez
|
||||
fussent
|
||||
|
||||
| forms of avoir (not including the infinitive):
|
||||
ayant
|
||||
eu
|
||||
eue
|
||||
eues
|
||||
eus
|
||||
ai
|
||||
as
|
||||
avons
|
||||
avez
|
||||
ont
|
||||
aurai
|
||||
auras
|
||||
aura
|
||||
aurons
|
||||
aurez
|
||||
auront
|
||||
aurais
|
||||
aurait
|
||||
aurions
|
||||
auriez
|
||||
auraient
|
||||
avais
|
||||
avait
|
||||
avions
|
||||
aviez
|
||||
avaient
|
||||
eut
|
||||
eûmes
|
||||
eûtes
|
||||
eurent
|
||||
aie
|
||||
aies
|
||||
ait
|
||||
ayons
|
||||
ayez
|
||||
aient
|
||||
eusse
|
||||
eusses
|
||||
eût
|
||||
eussions
|
||||
eussiez
|
||||
eussent
|
||||
|
||||
| Later additions (from Jean-Christophe Deschamps)
|
||||
ceci | this
|
||||
cela | that
|
||||
celà | that
|
||||
cet | this
|
||||
cette | this
|
||||
ici | here
|
||||
ils | they
|
||||
les | the (pl)
|
||||
leurs | their (pl)
|
||||
quel | which
|
||||
quels | which
|
||||
quelle | which
|
||||
quelles | which
|
||||
sans | without
|
||||
soi | oneself
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
|
||||
a
|
||||
ach
|
||||
ag
|
||||
agus
|
||||
an
|
||||
aon
|
||||
ar
|
||||
arna
|
||||
as
|
||||
b'
|
||||
ba
|
||||
beirt
|
||||
bhúr
|
||||
caoga
|
||||
ceathair
|
||||
ceathrar
|
||||
chomh
|
||||
chtó
|
||||
chuig
|
||||
chun
|
||||
cois
|
||||
céad
|
||||
cúig
|
||||
cúigear
|
||||
d'
|
||||
daichead
|
||||
dar
|
||||
de
|
||||
deich
|
||||
deichniúr
|
||||
den
|
||||
dhá
|
||||
do
|
||||
don
|
||||
dtí
|
||||
dá
|
||||
dár
|
||||
dó
|
||||
faoi
|
||||
faoin
|
||||
faoina
|
||||
faoinár
|
||||
fara
|
||||
fiche
|
||||
gach
|
||||
gan
|
||||
go
|
||||
gur
|
||||
haon
|
||||
hocht
|
||||
i
|
||||
iad
|
||||
idir
|
||||
in
|
||||
ina
|
||||
ins
|
||||
inár
|
||||
is
|
||||
le
|
||||
leis
|
||||
lena
|
||||
lenár
|
||||
m'
|
||||
mar
|
||||
mo
|
||||
mé
|
||||
na
|
||||
nach
|
||||
naoi
|
||||
naonúr
|
||||
ná
|
||||
ní
|
||||
níor
|
||||
nó
|
||||
nócha
|
||||
ocht
|
||||
ochtar
|
||||
os
|
||||
roimh
|
||||
sa
|
||||
seacht
|
||||
seachtar
|
||||
seachtó
|
||||
seasca
|
||||
seisear
|
||||
siad
|
||||
sibh
|
||||
sinn
|
||||
sna
|
||||
sé
|
||||
sí
|
||||
tar
|
||||
thar
|
||||
thú
|
||||
triúr
|
||||
trí
|
||||
trína
|
||||
trínár
|
||||
tríocha
|
||||
tú
|
||||
um
|
||||
ár
|
||||
é
|
||||
éis
|
||||
í
|
||||
ó
|
||||
ón
|
||||
óna
|
||||
ónár
|
|
@ -1,161 +0,0 @@
|
|||
# galican stopwords
|
||||
a
|
||||
aínda
|
||||
alí
|
||||
aquel
|
||||
aquela
|
||||
aquelas
|
||||
aqueles
|
||||
aquilo
|
||||
aquí
|
||||
ao
|
||||
aos
|
||||
as
|
||||
así
|
||||
á
|
||||
ben
|
||||
cando
|
||||
che
|
||||
co
|
||||
coa
|
||||
comigo
|
||||
con
|
||||
connosco
|
||||
contigo
|
||||
convosco
|
||||
coas
|
||||
cos
|
||||
cun
|
||||
cuns
|
||||
cunha
|
||||
cunhas
|
||||
da
|
||||
dalgunha
|
||||
dalgunhas
|
||||
dalgún
|
||||
dalgúns
|
||||
das
|
||||
de
|
||||
del
|
||||
dela
|
||||
delas
|
||||
deles
|
||||
desde
|
||||
deste
|
||||
do
|
||||
dos
|
||||
dun
|
||||
duns
|
||||
dunha
|
||||
dunhas
|
||||
e
|
||||
el
|
||||
ela
|
||||
elas
|
||||
eles
|
||||
en
|
||||
era
|
||||
eran
|
||||
esa
|
||||
esas
|
||||
ese
|
||||
eses
|
||||
esta
|
||||
estar
|
||||
estaba
|
||||
está
|
||||
están
|
||||
este
|
||||
estes
|
||||
estiven
|
||||
estou
|
||||
eu
|
||||
é
|
||||
facer
|
||||
foi
|
||||
foron
|
||||
fun
|
||||
había
|
||||
hai
|
||||
iso
|
||||
isto
|
||||
la
|
||||
las
|
||||
lle
|
||||
lles
|
||||
lo
|
||||
los
|
||||
mais
|
||||
me
|
||||
meu
|
||||
meus
|
||||
min
|
||||
miña
|
||||
miñas
|
||||
moi
|
||||
na
|
||||
nas
|
||||
neste
|
||||
nin
|
||||
no
|
||||
non
|
||||
nos
|
||||
nosa
|
||||
nosas
|
||||
noso
|
||||
nosos
|
||||
nós
|
||||
nun
|
||||
nunha
|
||||
nuns
|
||||
nunhas
|
||||
o
|
||||
os
|
||||
ou
|
||||
ó
|
||||
ós
|
||||
para
|
||||
pero
|
||||
pode
|
||||
pois
|
||||
pola
|
||||
polas
|
||||
polo
|
||||
polos
|
||||
por
|
||||
que
|
||||
se
|
||||
senón
|
||||
ser
|
||||
seu
|
||||
seus
|
||||
sexa
|
||||
sido
|
||||
sobre
|
||||
súa
|
||||
súas
|
||||
tamén
|
||||
tan
|
||||
te
|
||||
ten
|
||||
teñen
|
||||
teño
|
||||
ter
|
||||
teu
|
||||
teus
|
||||
ti
|
||||
tido
|
||||
tiña
|
||||
tiven
|
||||
túa
|
||||
túas
|
||||
un
|
||||
unha
|
||||
unhas
|
||||
uns
|
||||
vos
|
||||
vosa
|
||||
vosas
|
||||
voso
|
||||
vosos
|
||||
vós
|
|
@ -1,235 +0,0 @@
|
|||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# Note: by default this file also contains forms normalized by HindiNormalizer
|
||||
# for spelling variation (see section below), such that it can be used whether or
|
||||
# not you enable that feature. When adding additional entries to this list,
|
||||
# please add the normalized form as well.
|
||||
अंदर
|
||||
अत
|
||||
अपना
|
||||
अपनी
|
||||
अपने
|
||||
अभी
|
||||
आदि
|
||||
आप
|
||||
इत्यादि
|
||||
इन
|
||||
इनका
|
||||
इन्हीं
|
||||
इन्हें
|
||||
इन्हों
|
||||
इस
|
||||
इसका
|
||||
इसकी
|
||||
इसके
|
||||
इसमें
|
||||
इसी
|
||||
इसे
|
||||
उन
|
||||
उनका
|
||||
उनकी
|
||||
उनके
|
||||
उनको
|
||||
उन्हीं
|
||||
उन्हें
|
||||
उन्हों
|
||||
उस
|
||||
उसके
|
||||
उसी
|
||||
उसे
|
||||
एक
|
||||
एवं
|
||||
एस
|
||||
ऐसे
|
||||
और
|
||||
कई
|
||||
कर
|
||||
करता
|
||||
करते
|
||||
करना
|
||||
करने
|
||||
करें
|
||||
कहते
|
||||
कहा
|
||||
का
|
||||
काफ़ी
|
||||
कि
|
||||
कितना
|
||||
किन्हें
|
||||
किन्हों
|
||||
किया
|
||||
किर
|
||||
किस
|
||||
किसी
|
||||
किसे
|
||||
की
|
||||
कुछ
|
||||
कुल
|
||||
के
|
||||
को
|
||||
कोई
|
||||
कौन
|
||||
कौनसा
|
||||
गया
|
||||
घर
|
||||
जब
|
||||
जहाँ
|
||||
जा
|
||||
जितना
|
||||
जिन
|
||||
जिन्हें
|
||||
जिन्हों
|
||||
जिस
|
||||
जिसे
|
||||
जीधर
|
||||
जैसा
|
||||
जैसे
|
||||
जो
|
||||
तक
|
||||
तब
|
||||
तरह
|
||||
तिन
|
||||
तिन्हें
|
||||
तिन्हों
|
||||
तिस
|
||||
तिसे
|
||||
तो
|
||||
था
|
||||
थी
|
||||
थे
|
||||
दबारा
|
||||
दिया
|
||||
दुसरा
|
||||
दूसरे
|
||||
दो
|
||||
द्वारा
|
||||
न
|
||||
नहीं
|
||||
ना
|
||||
निहायत
|
||||
नीचे
|
||||
ने
|
||||
पर
|
||||
पर
|
||||
पहले
|
||||
पूरा
|
||||
पे
|
||||
फिर
|
||||
बनी
|
||||
बही
|
||||
बहुत
|
||||
बाद
|
||||
बाला
|
||||
बिलकुल
|
||||
भी
|
||||
भीतर
|
||||
मगर
|
||||
मानो
|
||||
मे
|
||||
में
|
||||
यदि
|
||||
यह
|
||||
यहाँ
|
||||
यही
|
||||
या
|
||||
यिह
|
||||
ये
|
||||
रखें
|
||||
रहा
|
||||
रहे
|
||||
ऱ्वासा
|
||||
लिए
|
||||
लिये
|
||||
लेकिन
|
||||
व
|
||||
वर्ग
|
||||
वह
|
||||
वह
|
||||
वहाँ
|
||||
वहीं
|
||||
वाले
|
||||
वुह
|
||||
वे
|
||||
वग़ैरह
|
||||
संग
|
||||
सकता
|
||||
सकते
|
||||
सबसे
|
||||
सभी
|
||||
साथ
|
||||
साबुत
|
||||
साभ
|
||||
सारा
|
||||
से
|
||||
सो
|
||||
ही
|
||||
हुआ
|
||||
हुई
|
||||
हुए
|
||||
है
|
||||
हैं
|
||||
हो
|
||||
होता
|
||||
होती
|
||||
होते
|
||||
होना
|
||||
होने
|
||||
# additional normalized forms of the above
|
||||
अपनि
|
||||
जेसे
|
||||
होति
|
||||
सभि
|
||||
तिंहों
|
||||
इंहों
|
||||
दवारा
|
||||
इसि
|
||||
किंहें
|
||||
थि
|
||||
उंहों
|
||||
ओर
|
||||
जिंहें
|
||||
वहिं
|
||||
अभि
|
||||
बनि
|
||||
हि
|
||||
उंहिं
|
||||
उंहें
|
||||
हें
|
||||
वगेरह
|
||||
एसे
|
||||
रवासा
|
||||
कोन
|
||||
निचे
|
||||
काफि
|
||||
उसि
|
||||
पुरा
|
||||
भितर
|
||||
हे
|
||||
बहि
|
||||
वहां
|
||||
कोइ
|
||||
यहां
|
||||
जिंहों
|
||||
तिंहें
|
||||
किसि
|
||||
कइ
|
||||
यहि
|
||||
इंहिं
|
||||
जिधर
|
||||
इंहें
|
||||
अदि
|
||||
इतयादि
|
||||
हुइ
|
||||
कोनसा
|
||||
इसकि
|
||||
दुसरे
|
||||
जहां
|
||||
अप
|
||||
किंहों
|
||||
उनकि
|
||||
भि
|
||||
वरग
|
||||
हुअ
|
||||
जेसा
|
||||
नहिं
|
|
@ -1,211 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| Hungarian stop word list
|
||||
| prepared by Anna Tordai
|
||||
|
||||
a
|
||||
ahogy
|
||||
ahol
|
||||
aki
|
||||
akik
|
||||
akkor
|
||||
alatt
|
||||
által
|
||||
általában
|
||||
amely
|
||||
amelyek
|
||||
amelyekben
|
||||
amelyeket
|
||||
amelyet
|
||||
amelynek
|
||||
ami
|
||||
amit
|
||||
amolyan
|
||||
amíg
|
||||
amikor
|
||||
át
|
||||
abban
|
||||
ahhoz
|
||||
annak
|
||||
arra
|
||||
arról
|
||||
az
|
||||
azok
|
||||
azon
|
||||
azt
|
||||
azzal
|
||||
azért
|
||||
aztán
|
||||
azután
|
||||
azonban
|
||||
bár
|
||||
be
|
||||
belül
|
||||
benne
|
||||
cikk
|
||||
cikkek
|
||||
cikkeket
|
||||
csak
|
||||
de
|
||||
e
|
||||
eddig
|
||||
egész
|
||||
egy
|
||||
egyes
|
||||
egyetlen
|
||||
egyéb
|
||||
egyik
|
||||
egyre
|
||||
ekkor
|
||||
el
|
||||
elég
|
||||
ellen
|
||||
elő
|
||||
először
|
||||
előtt
|
||||
első
|
||||
én
|
||||
éppen
|
||||
ebben
|
||||
ehhez
|
||||
emilyen
|
||||
ennek
|
||||
erre
|
||||
ez
|
||||
ezt
|
||||
ezek
|
||||
ezen
|
||||
ezzel
|
||||
ezért
|
||||
és
|
||||
fel
|
||||
felé
|
||||
hanem
|
||||
hiszen
|
||||
hogy
|
||||
hogyan
|
||||
igen
|
||||
így
|
||||
illetve
|
||||
ill.
|
||||
ill
|
||||
ilyen
|
||||
ilyenkor
|
||||
ison
|
||||
ismét
|
||||
itt
|
||||
jó
|
||||
jól
|
||||
jobban
|
||||
kell
|
||||
kellett
|
||||
keresztül
|
||||
keressünk
|
||||
ki
|
||||
kívül
|
||||
között
|
||||
közül
|
||||
legalább
|
||||
lehet
|
||||
lehetett
|
||||
legyen
|
||||
lenne
|
||||
lenni
|
||||
lesz
|
||||
lett
|
||||
maga
|
||||
magát
|
||||
majd
|
||||
majd
|
||||
már
|
||||
más
|
||||
másik
|
||||
meg
|
||||
még
|
||||
mellett
|
||||
mert
|
||||
mely
|
||||
melyek
|
||||
mi
|
||||
mit
|
||||
míg
|
||||
miért
|
||||
milyen
|
||||
mikor
|
||||
minden
|
||||
mindent
|
||||
mindenki
|
||||
mindig
|
||||
mint
|
||||
mintha
|
||||
mivel
|
||||
most
|
||||
nagy
|
||||
nagyobb
|
||||
nagyon
|
||||
ne
|
||||
néha
|
||||
nekem
|
||||
neki
|
||||
nem
|
||||
néhány
|
||||
nélkül
|
||||
nincs
|
||||
olyan
|
||||
ott
|
||||
össze
|
||||
ő
|
||||
ők
|
||||
őket
|
||||
pedig
|
||||
persze
|
||||
rá
|
||||
s
|
||||
saját
|
||||
sem
|
||||
semmi
|
||||
sok
|
||||
sokat
|
||||
sokkal
|
||||
számára
|
||||
szemben
|
||||
szerint
|
||||
szinte
|
||||
talán
|
||||
tehát
|
||||
teljes
|
||||
tovább
|
||||
továbbá
|
||||
több
|
||||
úgy
|
||||
ugyanis
|
||||
új
|
||||
újabb
|
||||
újra
|
||||
után
|
||||
utána
|
||||
utolsó
|
||||
vagy
|
||||
vagyis
|
||||
valaki
|
||||
valami
|
||||
valamint
|
||||
való
|
||||
vagyok
|
||||
van
|
||||
vannak
|
||||
volt
|
||||
voltam
|
||||
voltak
|
||||
voltunk
|
||||
vissza
|
||||
vele
|
||||
viszont
|
||||
volna
|
|
@ -1,46 +0,0 @@
|
|||
# example set of Armenian stopwords.
|
||||
այդ
|
||||
այլ
|
||||
այն
|
||||
այս
|
||||
դու
|
||||
դուք
|
||||
եմ
|
||||
են
|
||||
ենք
|
||||
ես
|
||||
եք
|
||||
է
|
||||
էի
|
||||
էին
|
||||
էինք
|
||||
էիր
|
||||
էիք
|
||||
էր
|
||||
ըստ
|
||||
թ
|
||||
ի
|
||||
ին
|
||||
իսկ
|
||||
իր
|
||||
կամ
|
||||
համար
|
||||
հետ
|
||||
հետո
|
||||
մենք
|
||||
մեջ
|
||||
մի
|
||||
ն
|
||||
նա
|
||||
նաև
|
||||
նրա
|
||||
նրանք
|
||||
որ
|
||||
որը
|
||||
որոնք
|
||||
որպես
|
||||
ու
|
||||
ում
|
||||
պիտի
|
||||
վրա
|
||||
և
|
|
@ -1,359 +0,0 @@
|
|||
# from appendix D of: A Study of Stemming Effects on Information
|
||||
# Retrieval in Bahasa Indonesia
|
||||
ada
|
||||
adanya
|
||||
adalah
|
||||
adapun
|
||||
agak
|
||||
agaknya
|
||||
agar
|
||||
akan
|
||||
akankah
|
||||
akhirnya
|
||||
aku
|
||||
akulah
|
||||
amat
|
||||
amatlah
|
||||
anda
|
||||
andalah
|
||||
antar
|
||||
diantaranya
|
||||
antara
|
||||
antaranya
|
||||
diantara
|
||||
apa
|
||||
apaan
|
||||
mengapa
|
||||
apabila
|
||||
apakah
|
||||
apalagi
|
||||
apatah
|
||||
atau
|
||||
ataukah
|
||||
ataupun
|
||||
bagai
|
||||
bagaikan
|
||||
sebagai
|
||||
sebagainya
|
||||
bagaimana
|
||||
bagaimanapun
|
||||
sebagaimana
|
||||
bagaimanakah
|
||||
bagi
|
||||
bahkan
|
||||
bahwa
|
||||
bahwasanya
|
||||
sebaliknya
|
||||
banyak
|
||||
sebanyak
|
||||
beberapa
|
||||
seberapa
|
||||
begini
|
||||
beginian
|
||||
beginikah
|
||||
beginilah
|
||||
sebegini
|
||||
begitu
|
||||
begitukah
|
||||
begitulah
|
||||
begitupun
|
||||
sebegitu
|
||||
belum
|
||||
belumlah
|
||||
sebelum
|
||||
sebelumnya
|
||||
sebenarnya
|
||||
berapa
|
||||
berapakah
|
||||
berapalah
|
||||
berapapun
|
||||
betulkah
|
||||
sebetulnya
|
||||
biasa
|
||||
biasanya
|
||||
bila
|
||||
bilakah
|
||||
bisa
|
||||
bisakah
|
||||
sebisanya
|
||||
boleh
|
||||
bolehkah
|
||||
bolehlah
|
||||
buat
|
||||
bukan
|
||||
bukankah
|
||||
bukanlah
|
||||
bukannya
|
||||
cuma
|
||||
percuma
|
||||
dahulu
|
||||
dalam
|
||||
dan
|
||||
dapat
|
||||
dari
|
||||
daripada
|
||||
dekat
|
||||
demi
|
||||
demikian
|
||||
demikianlah
|
||||
sedemikian
|
||||
dengan
|
||||
depan
|
||||
di
|
||||
dia
|
||||
dialah
|
||||
dini
|
||||
diri
|
||||
dirinya
|
||||
terdiri
|
||||
dong
|
||||
dulu
|
||||
enggak
|
||||
enggaknya
|
||||
entah
|
||||
entahlah
|
||||
terhadap
|
||||
terhadapnya
|
||||
hal
|
||||
hampir
|
||||
hanya
|
||||
hanyalah
|
||||
harus
|
||||
haruslah
|
||||
harusnya
|
||||
seharusnya
|
||||
hendak
|
||||
hendaklah
|
||||
hendaknya
|
||||
hingga
|
||||
sehingga
|
||||
ia
|
||||
ialah
|
||||
ibarat
|
||||
ingin
|
||||
inginkah
|
||||
inginkan
|
||||
ini
|
||||
inikah
|
||||
inilah
|
||||
itu
|
||||
itukah
|
||||
itulah
|
||||
jangan
|
||||
jangankan
|
||||
janganlah
|
||||
jika
|
||||
jikalau
|
||||
juga
|
||||
justru
|
||||
kala
|
||||
kalau
|
||||
kalaulah
|
||||
kalaupun
|
||||
kalian
|
||||
kami
|
||||
kamilah
|
||||
kamu
|
||||
kamulah
|
||||
kan
|
||||
kapan
|
||||
kapankah
|
||||
kapanpun
|
||||
dikarenakan
|
||||
karena
|
||||
karenanya
|
||||
ke
|
||||
kecil
|
||||
kemudian
|
||||
kenapa
|
||||
kepada
|
||||
kepadanya
|
||||
ketika
|
||||
seketika
|
||||
khususnya
|
||||
kini
|
||||
kinilah
|
||||
kiranya
|
||||
sekiranya
|
||||
kita
|
||||
kitalah
|
||||
kok
|
||||
lagi
|
||||
lagian
|
||||
selagi
|
||||
lah
|
||||
lain
|
||||
lainnya
|
||||
melainkan
|
||||
selaku
|
||||
lalu
|
||||
melalui
|
||||
terlalu
|
||||
lama
|
||||
lamanya
|
||||
selama
|
||||
selama
|
||||
selamanya
|
||||
lebih
|
||||
terlebih
|
||||
bermacam
|
||||
macam
|
||||
semacam
|
||||
maka
|
||||
makanya
|
||||
makin
|
||||
malah
|
||||
malahan
|
||||
mampu
|
||||
mampukah
|
||||
mana
|
||||
manakala
|
||||
manalagi
|
||||
masih
|
||||
masihkah
|
||||
semasih
|
||||
masing
|
||||
mau
|
||||
maupun
|
||||
semaunya
|
||||
memang
|
||||
mereka
|
||||
merekalah
|
||||
meski
|
||||
meskipun
|
||||
semula
|
||||
mungkin
|
||||
mungkinkah
|
||||
nah
|
||||
namun
|
||||
nanti
|
||||
nantinya
|
||||
nyaris
|
||||
oleh
|
||||
olehnya
|
||||
seorang
|
||||
seseorang
|
||||
pada
|
||||
padanya
|
||||
padahal
|
||||
paling
|
||||
sepanjang
|
||||
pantas
|
||||
sepantasnya
|
||||
sepantasnyalah
|
||||
para
|
||||
pasti
|
||||
pastilah
|
||||
per
|
||||
pernah
|
||||
pula
|
||||
pun
|
||||
merupakan
|
||||
rupanya
|
||||
serupa
|
||||
saat
|
||||
saatnya
|
||||
sesaat
|
||||
saja
|
||||
sajalah
|
||||
saling
|
||||
bersama
|
||||
sama
|
||||
sesama
|
||||
sambil
|
||||
sampai
|
||||
sana
|
||||
sangat
|
||||
sangatlah
|
||||
saya
|
||||
sayalah
|
||||
se
|
||||
sebab
|
||||
sebabnya
|
||||
sebuah
|
||||
tersebut
|
||||
tersebutlah
|
||||
sedang
|
||||
sedangkan
|
||||
sedikit
|
||||
sedikitnya
|
||||
segala
|
||||
segalanya
|
||||
segera
|
||||
sesegera
|
||||
sejak
|
||||
sejenak
|
||||
sekali
|
||||
sekalian
|
||||
sekalipun
|
||||
sesekali
|
||||
sekaligus
|
||||
sekarang
|
||||
sekarang
|
||||
sekitar
|
||||
sekitarnya
|
||||
sela
|
||||
selain
|
||||
selalu
|
||||
seluruh
|
||||
seluruhnya
|
||||
semakin
|
||||
sementara
|
||||
sempat
|
||||
semua
|
||||
semuanya
|
||||
sendiri
|
||||
sendirinya
|
||||
seolah
|
||||
seperti
|
||||
sepertinya
|
||||
sering
|
||||
seringnya
|
||||
serta
|
||||
siapa
|
||||
siapakah
|
||||
siapapun
|
||||
disini
|
||||
disinilah
|
||||
sini
|
||||
sinilah
|
||||
sesuatu
|
||||
sesuatunya
|
||||
suatu
|
||||
sesudah
|
||||
sesudahnya
|
||||
sudah
|
||||
sudahkah
|
||||
sudahlah
|
||||
supaya
|
||||
tadi
|
||||
tadinya
|
||||
tak
|
||||
tanpa
|
||||
setelah
|
||||
telah
|
||||
tentang
|
||||
tentu
|
||||
tentulah
|
||||
tentunya
|
||||
tertentu
|
||||
seterusnya
|
||||
tapi
|
||||
tetapi
|
||||
setiap
|
||||
tiap
|
||||
setidaknya
|
||||
tidak
|
||||
tidakkah
|
||||
tidaklah
|
||||
toh
|
||||
waduh
|
||||
wah
|
||||
wahai
|
||||
sewaktu
|
||||
walau
|
||||
walaupun
|
||||
wong
|
||||
yaitu
|
||||
yakni
|
||||
yang
|
|
@ -1,303 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| An Italian stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
ad | a (to) before vowel
|
||||
al | a + il
|
||||
allo | a + lo
|
||||
ai | a + i
|
||||
agli | a + gli
|
||||
all | a + l'
|
||||
agl | a + gl'
|
||||
alla | a + la
|
||||
alle | a + le
|
||||
con | with
|
||||
col | con + il
|
||||
coi | con + i (forms collo, cogli etc are now very rare)
|
||||
da | from
|
||||
dal | da + il
|
||||
dallo | da + lo
|
||||
dai | da + i
|
||||
dagli | da + gli
|
||||
dall | da + l'
|
||||
dagl | da + gll'
|
||||
dalla | da + la
|
||||
dalle | da + le
|
||||
di | of
|
||||
del | di + il
|
||||
dello | di + lo
|
||||
dei | di + i
|
||||
degli | di + gli
|
||||
dell | di + l'
|
||||
degl | di + gl'
|
||||
della | di + la
|
||||
delle | di + le
|
||||
in | in
|
||||
nel | in + el
|
||||
nello | in + lo
|
||||
nei | in + i
|
||||
negli | in + gli
|
||||
nell | in + l'
|
||||
negl | in + gl'
|
||||
nella | in + la
|
||||
nelle | in + le
|
||||
su | on
|
||||
sul | su + il
|
||||
sullo | su + lo
|
||||
sui | su + i
|
||||
sugli | su + gli
|
||||
sull | su + l'
|
||||
sugl | su + gl'
|
||||
sulla | su + la
|
||||
sulle | su + le
|
||||
per | through, by
|
||||
tra | among
|
||||
contro | against
|
||||
io | I
|
||||
tu | thou
|
||||
lui | he
|
||||
lei | she
|
||||
noi | we
|
||||
voi | you
|
||||
loro | they
|
||||
mio | my
|
||||
mia |
|
||||
miei |
|
||||
mie |
|
||||
tuo |
|
||||
tua |
|
||||
tuoi | thy
|
||||
tue |
|
||||
suo |
|
||||
sua |
|
||||
suoi | his, her
|
||||
sue |
|
||||
nostro | our
|
||||
nostra |
|
||||
nostri |
|
||||
nostre |
|
||||
vostro | your
|
||||
vostra |
|
||||
vostri |
|
||||
vostre |
|
||||
mi | me
|
||||
ti | thee
|
||||
ci | us, there
|
||||
vi | you, there
|
||||
lo | him, the
|
||||
la | her, the
|
||||
li | them
|
||||
le | them, the
|
||||
gli | to him, the
|
||||
ne | from there etc
|
||||
il | the
|
||||
un | a
|
||||
uno | a
|
||||
una | a
|
||||
ma | but
|
||||
ed | and
|
||||
se | if
|
||||
perché | why, because
|
||||
anche | also
|
||||
come | how
|
||||
dov | where (as dov')
|
||||
dove | where
|
||||
che | who, that
|
||||
chi | who
|
||||
cui | whom
|
||||
non | not
|
||||
più | more
|
||||
quale | who, that
|
||||
quanto | how much
|
||||
quanti |
|
||||
quanta |
|
||||
quante |
|
||||
quello | that
|
||||
quelli |
|
||||
quella |
|
||||
quelle |
|
||||
questo | this
|
||||
questi |
|
||||
questa |
|
||||
queste |
|
||||
si | yes
|
||||
tutto | all
|
||||
tutti | all
|
||||
|
||||
| single letter forms:
|
||||
|
||||
a | at
|
||||
c | as c' for ce or ci
|
||||
e | and
|
||||
i | the
|
||||
l | as l'
|
||||
o | or
|
||||
|
||||
| forms of avere, to have (not including the infinitive):
|
||||
|
||||
ho
|
||||
hai
|
||||
ha
|
||||
abbiamo
|
||||
avete
|
||||
hanno
|
||||
abbia
|
||||
abbiate
|
||||
abbiano
|
||||
avrò
|
||||
avrai
|
||||
avrà
|
||||
avremo
|
||||
avrete
|
||||
avranno
|
||||
avrei
|
||||
avresti
|
||||
avrebbe
|
||||
avremmo
|
||||
avreste
|
||||
avrebbero
|
||||
avevo
|
||||
avevi
|
||||
aveva
|
||||
avevamo
|
||||
avevate
|
||||
avevano
|
||||
ebbi
|
||||
avesti
|
||||
ebbe
|
||||
avemmo
|
||||
aveste
|
||||
ebbero
|
||||
avessi
|
||||
avesse
|
||||
avessimo
|
||||
avessero
|
||||
avendo
|
||||
avuto
|
||||
avuta
|
||||
avuti
|
||||
avute
|
||||
|
||||
| forms of essere, to be (not including the infinitive):
|
||||
sono
|
||||
sei
|
||||
è
|
||||
siamo
|
||||
siete
|
||||
sia
|
||||
siate
|
||||
siano
|
||||
sarò
|
||||
sarai
|
||||
sarà
|
||||
saremo
|
||||
sarete
|
||||
saranno
|
||||
sarei
|
||||
saresti
|
||||
sarebbe
|
||||
saremmo
|
||||
sareste
|
||||
sarebbero
|
||||
ero
|
||||
eri
|
||||
era
|
||||
eravamo
|
||||
eravate
|
||||
erano
|
||||
fui
|
||||
fosti
|
||||
fu
|
||||
fummo
|
||||
foste
|
||||
furono
|
||||
fossi
|
||||
fosse
|
||||
fossimo
|
||||
fossero
|
||||
essendo
|
||||
|
||||
| forms of fare, to do (not including the infinitive, fa, fat-):
|
||||
faccio
|
||||
fai
|
||||
facciamo
|
||||
fanno
|
||||
faccia
|
||||
facciate
|
||||
facciano
|
||||
farò
|
||||
farai
|
||||
farà
|
||||
faremo
|
||||
farete
|
||||
faranno
|
||||
farei
|
||||
faresti
|
||||
farebbe
|
||||
faremmo
|
||||
fareste
|
||||
farebbero
|
||||
facevo
|
||||
facevi
|
||||
faceva
|
||||
facevamo
|
||||
facevate
|
||||
facevano
|
||||
feci
|
||||
facesti
|
||||
fece
|
||||
facemmo
|
||||
faceste
|
||||
fecero
|
||||
facessi
|
||||
facesse
|
||||
facessimo
|
||||
facessero
|
||||
facendo
|
||||
|
||||
| forms of stare, to be (not including the infinitive):
|
||||
sto
|
||||
stai
|
||||
sta
|
||||
stiamo
|
||||
stanno
|
||||
stia
|
||||
stiate
|
||||
stiano
|
||||
starò
|
||||
starai
|
||||
starà
|
||||
staremo
|
||||
starete
|
||||
staranno
|
||||
starei
|
||||
staresti
|
||||
starebbe
|
||||
staremmo
|
||||
stareste
|
||||
starebbero
|
||||
stavo
|
||||
stavi
|
||||
stava
|
||||
stavamo
|
||||
stavate
|
||||
stavano
|
||||
stetti
|
||||
stesti
|
||||
stette
|
||||
stemmo
|
||||
steste
|
||||
stettero
|
||||
stessi
|
||||
stesse
|
||||
stessimo
|
||||
stessero
|
||||
stando
|
|
@ -1,127 +0,0 @@
|
|||
#
|
||||
# This file defines a stopword set for Japanese.
|
||||
#
|
||||
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
|
||||
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
|
||||
# for frequency lists, etc. that can be useful for making your own set (if desired)
|
||||
#
|
||||
# Note that there is an overlap between these stopwords and the terms stopped when used
|
||||
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
|
||||
# that comments are not allowed on the same line as stopwords.
|
||||
#
|
||||
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
|
||||
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
|
||||
# using the same character width as the entries in this file. Since this StopFilter is
|
||||
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
|
||||
# entries to be in half-width and your kana entries to be in full-width.
|
||||
#
|
||||
の
|
||||
に
|
||||
は
|
||||
を
|
||||
た
|
||||
が
|
||||
で
|
||||
て
|
||||
と
|
||||
し
|
||||
れ
|
||||
さ
|
||||
ある
|
||||
いる
|
||||
も
|
||||
する
|
||||
から
|
||||
な
|
||||
こと
|
||||
として
|
||||
い
|
||||
や
|
||||
れる
|
||||
など
|
||||
なっ
|
||||
ない
|
||||
この
|
||||
ため
|
||||
その
|
||||
あっ
|
||||
よう
|
||||
また
|
||||
もの
|
||||
という
|
||||
あり
|
||||
まで
|
||||
られ
|
||||
なる
|
||||
へ
|
||||
か
|
||||
だ
|
||||
これ
|
||||
によって
|
||||
により
|
||||
おり
|
||||
より
|
||||
による
|
||||
ず
|
||||
なり
|
||||
られる
|
||||
において
|
||||
ば
|
||||
なかっ
|
||||
なく
|
||||
しかし
|
||||
について
|
||||
せ
|
||||
だっ
|
||||
その後
|
||||
できる
|
||||
それ
|
||||
う
|
||||
ので
|
||||
なお
|
||||
のみ
|
||||
でき
|
||||
き
|
||||
つ
|
||||
における
|
||||
および
|
||||
いう
|
||||
さらに
|
||||
でも
|
||||
ら
|
||||
たり
|
||||
その他
|
||||
に関する
|
||||
たち
|
||||
ます
|
||||
ん
|
||||
なら
|
||||
に対して
|
||||
特に
|
||||
せる
|
||||
及び
|
||||
これら
|
||||
とき
|
||||
では
|
||||
にて
|
||||
ほか
|
||||
ながら
|
||||
うち
|
||||
そして
|
||||
とともに
|
||||
ただし
|
||||
かつて
|
||||
それぞれ
|
||||
または
|
||||
お
|
||||
ほど
|
||||
ものの
|
||||
に対する
|
||||
ほとんど
|
||||
と共に
|
||||
といった
|
||||
です
|
||||
とも
|
||||
ところ
|
||||
ここ
|
||||
##### End of file
|
|
@ -1,172 +0,0 @@
|
|||
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
|
||||
# the original list of over 800 forms was refined:
|
||||
# pronouns, adverbs, interjections were removed
|
||||
#
|
||||
# prepositions
|
||||
aiz
|
||||
ap
|
||||
ar
|
||||
apakš
|
||||
ārpus
|
||||
augšpus
|
||||
bez
|
||||
caur
|
||||
dēļ
|
||||
gar
|
||||
iekš
|
||||
iz
|
||||
kopš
|
||||
labad
|
||||
lejpus
|
||||
līdz
|
||||
no
|
||||
otrpus
|
||||
pa
|
||||
par
|
||||
pār
|
||||
pēc
|
||||
pie
|
||||
pirms
|
||||
pret
|
||||
priekš
|
||||
starp
|
||||
šaipus
|
||||
uz
|
||||
viņpus
|
||||
virs
|
||||
virspus
|
||||
zem
|
||||
apakšpus
|
||||
# Conjunctions
|
||||
un
|
||||
bet
|
||||
jo
|
||||
ja
|
||||
ka
|
||||
lai
|
||||
tomēr
|
||||
tikko
|
||||
turpretī
|
||||
arī
|
||||
kaut
|
||||
gan
|
||||
tādēļ
|
||||
tā
|
||||
ne
|
||||
tikvien
|
||||
vien
|
||||
kā
|
||||
ir
|
||||
te
|
||||
vai
|
||||
kamēr
|
||||
# Particles
|
||||
ar
|
||||
diezin
|
||||
droši
|
||||
diemžēl
|
||||
nebūt
|
||||
ik
|
||||
it
|
||||
taču
|
||||
nu
|
||||
pat
|
||||
tiklab
|
||||
iekšpus
|
||||
nedz
|
||||
tik
|
||||
nevis
|
||||
turpretim
|
||||
jeb
|
||||
iekam
|
||||
iekām
|
||||
iekāms
|
||||
kolīdz
|
||||
līdzko
|
||||
tiklīdz
|
||||
jebšu
|
||||
tālab
|
||||
tāpēc
|
||||
nekā
|
||||
itin
|
||||
jā
|
||||
jau
|
||||
jel
|
||||
nē
|
||||
nezin
|
||||
tad
|
||||
tikai
|
||||
vis
|
||||
tak
|
||||
iekams
|
||||
vien
|
||||
# modal verbs
|
||||
būt
|
||||
biju
|
||||
biji
|
||||
bija
|
||||
bijām
|
||||
bijāt
|
||||
esmu
|
||||
esi
|
||||
esam
|
||||
esat
|
||||
būšu
|
||||
būsi
|
||||
būs
|
||||
būsim
|
||||
būsiet
|
||||
tikt
|
||||
tiku
|
||||
tiki
|
||||
tika
|
||||
tikām
|
||||
tikāt
|
||||
tieku
|
||||
tiec
|
||||
tiek
|
||||
tiekam
|
||||
tiekat
|
||||
tikšu
|
||||
tiks
|
||||
tiksim
|
||||
tiksiet
|
||||
tapt
|
||||
tapi
|
||||
tapāt
|
||||
topat
|
||||
tapšu
|
||||
tapsi
|
||||
taps
|
||||
tapsim
|
||||
tapsiet
|
||||
kļūt
|
||||
kļuvu
|
||||
kļuvi
|
||||
kļuva
|
||||
kļuvām
|
||||
kļuvāt
|
||||
kļūstu
|
||||
kļūsti
|
||||
kļūst
|
||||
kļūstam
|
||||
kļūstat
|
||||
kļūšu
|
||||
kļūsi
|
||||
kļūs
|
||||
kļūsim
|
||||
kļūsiet
|
||||
# verbs
|
||||
varēt
|
||||
varēju
|
||||
varējām
|
||||
varēšu
|
||||
varēsim
|
||||
var
|
||||
varēji
|
||||
varējāt
|
||||
varēsi
|
||||
varēsiet
|
||||
varat
|
||||
varēja
|
||||
varēs
|
|
@ -1,119 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Dutch stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| This is a ranked list (commonest to rarest) of stopwords derived from
|
||||
| a large sample of Dutch text.
|
||||
|
||||
| Dutch stop words frequently exhibit homonym clashes. These are indicated
|
||||
| clearly below.
|
||||
|
||||
de | the
|
||||
en | and
|
||||
van | of, from
|
||||
ik | I, the ego
|
||||
te | (1) chez, at etc, (2) to, (3) too
|
||||
dat | that, which
|
||||
die | that, those, who, which
|
||||
in | in, inside
|
||||
een | a, an, one
|
||||
hij | he
|
||||
het | the, it
|
||||
niet | not, nothing, naught
|
||||
zijn | (1) to be, being, (2) his, one's, its
|
||||
is | is
|
||||
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
|
||||
op | on, upon, at, in, up, used up
|
||||
aan | on, upon, to (as dative)
|
||||
met | with, by
|
||||
als | like, such as, when
|
||||
voor | (1) before, in front of, (2) furrow
|
||||
had | had, past tense all persons sing. of 'hebben' (have)
|
||||
er | there
|
||||
maar | but, only
|
||||
om | round, about, for etc
|
||||
hem | him
|
||||
dan | then
|
||||
zou | should/would, past tense all persons sing. of 'zullen'
|
||||
of | or, whether, if
|
||||
wat | what, something, anything
|
||||
mijn | possessive and noun 'mine'
|
||||
men | people, 'one'
|
||||
dit | this
|
||||
zo | so, thus, in this way
|
||||
door | through by
|
||||
over | over, across
|
||||
ze | she, her, they, them
|
||||
zich | oneself
|
||||
bij | (1) a bee, (2) by, near, at
|
||||
ook | also, too
|
||||
tot | till, until
|
||||
je | you
|
||||
mij | me
|
||||
uit | out of, from
|
||||
der | Old Dutch form of 'van der' still found in surnames
|
||||
daar | (1) there, (2) because
|
||||
haar | (1) her, their, them, (2) hair
|
||||
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
|
||||
heb | present first person sing. of 'to have'
|
||||
hoe | how, why
|
||||
heeft | present third person sing. of 'to have'
|
||||
hebben | 'to have' and various parts thereof
|
||||
deze | this
|
||||
u | you
|
||||
want | (1) for, (2) mitten, (3) rigging
|
||||
nog | yet, still
|
||||
zal | 'shall', first and third person sing. of verb 'zullen' (will)
|
||||
me | me
|
||||
zij | she, they
|
||||
nu | now
|
||||
ge | 'thou', still used in Belgium and south Netherlands
|
||||
geen | none
|
||||
omdat | because
|
||||
iets | something, somewhat
|
||||
worden | to become, grow, get
|
||||
toch | yet, still
|
||||
al | all, every, each
|
||||
waren | (1) 'were' (2) to wander, (3) wares, (3)
|
||||
veel | much, many
|
||||
meer | (1) more, (2) lake
|
||||
doen | to do, to make
|
||||
toen | then, when
|
||||
moet | noun 'spot/mote' and present form of 'to must'
|
||||
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
|
||||
zonder | without
|
||||
kan | noun 'can' and present form of 'to be able'
|
||||
hun | their, them
|
||||
dus | so, consequently
|
||||
alles | all, everything, anything
|
||||
onder | under, beneath
|
||||
ja | yes, of course
|
||||
eens | once, one day
|
||||
hier | here
|
||||
wie | who
|
||||
werd | imperfect third person sing. of 'become'
|
||||
altijd | always
|
||||
doch | yet, but etc
|
||||
wordt | present third person sing. of 'become'
|
||||
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
|
||||
kunnen | to be able
|
||||
ons | us/our
|
||||
zelf | self
|
||||
tegen | against, towards, at
|
||||
na | after, near
|
||||
reeds | already
|
||||
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
|
||||
kon | could; past tense of 'to be able'
|
||||
niets | nothing
|
||||
uw | your
|
||||
iemand | somebody
|
||||
geweest | been; past participle of 'be'
|
||||
andere | other
|
|
@ -1,194 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| This stop word list is for the dominant bokmål dialect. Words unique
|
||||
| to nynorsk are marked *.
|
||||
|
||||
| Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
|
||||
|
||||
og | and
|
||||
i | in
|
||||
jeg | I
|
||||
det | it/this/that
|
||||
at | to (w. inf.)
|
||||
en | a/an
|
||||
et | a/an
|
||||
den | it/this/that
|
||||
til | to
|
||||
er | is/am/are
|
||||
som | who/that
|
||||
på | on
|
||||
de | they / you(formal)
|
||||
med | with
|
||||
han | he
|
||||
av | of
|
||||
ikke | not
|
||||
ikkje | not *
|
||||
der | there
|
||||
så | so
|
||||
var | was/were
|
||||
meg | me
|
||||
seg | you
|
||||
men | but
|
||||
ett | one
|
||||
har | have
|
||||
om | about
|
||||
vi | we
|
||||
min | my
|
||||
mitt | my
|
||||
ha | have
|
||||
hadde | had
|
||||
hun | she
|
||||
nå | now
|
||||
over | over
|
||||
da | when/as
|
||||
ved | by/know
|
||||
fra | from
|
||||
du | you
|
||||
ut | out
|
||||
sin | your
|
||||
dem | them
|
||||
oss | us
|
||||
opp | up
|
||||
man | you/one
|
||||
kan | can
|
||||
hans | his
|
||||
hvor | where
|
||||
eller | or
|
||||
hva | what
|
||||
skal | shall/must
|
||||
selv | self (reflective)
|
||||
sjøl | self (reflective)
|
||||
her | here
|
||||
alle | all
|
||||
vil | will
|
||||
bli | become
|
||||
ble | became
|
||||
blei | became *
|
||||
blitt | have become
|
||||
kunne | could
|
||||
inn | in
|
||||
når | when
|
||||
være | be
|
||||
kom | come
|
||||
noen | some
|
||||
noe | some
|
||||
ville | would
|
||||
dere | you
|
||||
som | who/which/that
|
||||
deres | their/theirs
|
||||
kun | only/just
|
||||
ja | yes
|
||||
etter | after
|
||||
ned | down
|
||||
skulle | should
|
||||
denne | this
|
||||
for | for/because
|
||||
deg | you
|
||||
si | hers/his
|
||||
sine | hers/his
|
||||
sitt | hers/his
|
||||
mot | against
|
||||
å | to
|
||||
meget | much
|
||||
hvorfor | why
|
||||
dette | this
|
||||
disse | these/those
|
||||
uten | without
|
||||
hvordan | how
|
||||
ingen | none
|
||||
din | your
|
||||
ditt | your
|
||||
blir | become
|
||||
samme | same
|
||||
hvilken | which
|
||||
hvilke | which (plural)
|
||||
sånn | such a
|
||||
inni | inside/within
|
||||
mellom | between
|
||||
vår | our
|
||||
hver | each
|
||||
hvem | who
|
||||
vors | us/ours
|
||||
hvis | whose
|
||||
både | both
|
||||
bare | only/just
|
||||
enn | than
|
||||
fordi | as/because
|
||||
før | before
|
||||
mange | many
|
||||
også | also
|
||||
slik | just
|
||||
vært | been
|
||||
være | to be
|
||||
båe | both *
|
||||
begge | both
|
||||
siden | since
|
||||
dykk | your *
|
||||
dykkar | yours *
|
||||
dei | they *
|
||||
deira | them *
|
||||
deires | theirs *
|
||||
deim | them *
|
||||
di | your (fem.) *
|
||||
då | as/when *
|
||||
eg | I *
|
||||
ein | a/an *
|
||||
eit | a/an *
|
||||
eitt | a/an *
|
||||
elles | or *
|
||||
honom | he *
|
||||
hjå | at *
|
||||
ho | she *
|
||||
hoe | she *
|
||||
henne | her
|
||||
hennar | her/hers
|
||||
hennes | hers
|
||||
hoss | how *
|
||||
hossen | how *
|
||||
ikkje | not *
|
||||
ingi | noone *
|
||||
inkje | noone *
|
||||
korleis | how *
|
||||
korso | how *
|
||||
kva | what/which *
|
||||
kvar | where *
|
||||
kvarhelst | where *
|
||||
kven | who/whom *
|
||||
kvi | why *
|
||||
kvifor | why *
|
||||
me | we *
|
||||
medan | while *
|
||||
mi | my *
|
||||
mine | my *
|
||||
mykje | much *
|
||||
no | now *
|
||||
nokon | some (masc./neut.) *
|
||||
noka | some (fem.) *
|
||||
nokor | some *
|
||||
noko | some *
|
||||
nokre | some *
|
||||
si | his/hers *
|
||||
sia | since *
|
||||
sidan | since *
|
||||
so | so *
|
||||
somt | some *
|
||||
somme | some *
|
||||
um | about*
|
||||
upp | up *
|
||||
vere | be *
|
||||
vore | was *
|
||||
verte | become *
|
||||
vort | become *
|
||||
varte | became *
|
||||
vart | became *
|
||||
|
|
@ -1,253 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
|
||||
| The following is a ranked list (commonest to rarest) of stopwords
|
||||
| deriving from a large sample of text.
|
||||
|
||||
| Extra words have been added at the end.
|
||||
|
||||
de | of, from
|
||||
a | the; to, at; her
|
||||
o | the; him
|
||||
que | who, that
|
||||
e | and
|
||||
do | de + o
|
||||
da | de + a
|
||||
em | in
|
||||
um | a
|
||||
para | for
|
||||
| é from SER
|
||||
com | with
|
||||
não | not, no
|
||||
uma | a
|
||||
os | the; them
|
||||
no | em + o
|
||||
se | himself etc
|
||||
na | em + a
|
||||
por | for
|
||||
mais | more
|
||||
as | the; them
|
||||
dos | de + os
|
||||
como | as, like
|
||||
mas | but
|
||||
| foi from SER
|
||||
ao | a + o
|
||||
ele | he
|
||||
das | de + as
|
||||
| tem from TER
|
||||
à | a + a
|
||||
seu | his
|
||||
sua | her
|
||||
ou | or
|
||||
| ser from SER
|
||||
quando | when
|
||||
muito | much
|
||||
| há from HAV
|
||||
nos | em + os; us
|
||||
já | already, now
|
||||
| está from EST
|
||||
eu | I
|
||||
também | also
|
||||
só | only, just
|
||||
pelo | per + o
|
||||
pela | per + a
|
||||
até | up to
|
||||
isso | that
|
||||
ela | he
|
||||
entre | between
|
||||
| era from SER
|
||||
depois | after
|
||||
sem | without
|
||||
mesmo | same
|
||||
aos | a + os
|
||||
| ter from TER
|
||||
seus | his
|
||||
quem | whom
|
||||
nas | em + as
|
||||
me | me
|
||||
esse | that
|
||||
eles | they
|
||||
| estão from EST
|
||||
você | you
|
||||
| tinha from TER
|
||||
| foram from SER
|
||||
essa | that
|
||||
num | em + um
|
||||
nem | nor
|
||||
suas | her
|
||||
meu | my
|
||||
às | a + as
|
||||
minha | my
|
||||
| têm from TER
|
||||
numa | em + uma
|
||||
pelos | per + os
|
||||
elas | they
|
||||
| havia from HAV
|
||||
| seja from SER
|
||||
qual | which
|
||||
| será from SER
|
||||
nós | we
|
||||
| tenho from TER
|
||||
lhe | to him, her
|
||||
deles | of them
|
||||
essas | those
|
||||
esses | those
|
||||
pelas | per + as
|
||||
este | this
|
||||
| fosse from SER
|
||||
dele | of him
|
||||
|
||||
| other words. There are many contractions such as naquele = em+aquele,
|
||||
| mo = me+o, but they are rare.
|
||||
| Indefinite article plural forms are also rare.
|
||||
|
||||
tu | thou
|
||||
te | thee
|
||||
vocês | you (plural)
|
||||
vos | you
|
||||
lhes | to them
|
||||
meus | my
|
||||
minhas
|
||||
teu | thy
|
||||
tua
|
||||
teus
|
||||
tuas
|
||||
nosso | our
|
||||
nossa
|
||||
nossos
|
||||
nossas
|
||||
|
||||
dela | of her
|
||||
delas | of them
|
||||
|
||||
esta | this
|
||||
estes | these
|
||||
estas | these
|
||||
aquele | that
|
||||
aquela | that
|
||||
aqueles | those
|
||||
aquelas | those
|
||||
isto | this
|
||||
aquilo | that
|
||||
|
||||
| forms of estar, to be (not including the infinitive):
|
||||
estou
|
||||
está
|
||||
estamos
|
||||
estão
|
||||
estive
|
||||
esteve
|
||||
estivemos
|
||||
estiveram
|
||||
estava
|
||||
estávamos
|
||||
estavam
|
||||
estivera
|
||||
estivéramos
|
||||
esteja
|
||||
estejamos
|
||||
estejam
|
||||
estivesse
|
||||
estivéssemos
|
||||
estivessem
|
||||
estiver
|
||||
estivermos
|
||||
estiverem
|
||||
|
||||
| forms of haver, to have (not including the infinitive):
|
||||
hei
|
||||
há
|
||||
havemos
|
||||
hão
|
||||
houve
|
||||
houvemos
|
||||
houveram
|
||||
houvera
|
||||
houvéramos
|
||||
haja
|
||||
hajamos
|
||||
hajam
|
||||
houvesse
|
||||
houvéssemos
|
||||
houvessem
|
||||
houver
|
||||
houvermos
|
||||
houverem
|
||||
houverei
|
||||
houverá
|
||||
houveremos
|
||||
houverão
|
||||
houveria
|
||||
houveríamos
|
||||
houveriam
|
||||
|
||||
| forms of ser, to be (not including the infinitive):
|
||||
sou
|
||||
somos
|
||||
são
|
||||
era
|
||||
éramos
|
||||
eram
|
||||
fui
|
||||
foi
|
||||
fomos
|
||||
foram
|
||||
fora
|
||||
fôramos
|
||||
seja
|
||||
sejamos
|
||||
sejam
|
||||
fosse
|
||||
fôssemos
|
||||
fossem
|
||||
for
|
||||
formos
|
||||
forem
|
||||
serei
|
||||
será
|
||||
seremos
|
||||
serão
|
||||
seria
|
||||
seríamos
|
||||
seriam
|
||||
|
||||
| forms of ter, to have (not including the infinitive):
|
||||
tenho
|
||||
tem
|
||||
temos
|
||||
tém
|
||||
tinha
|
||||
tínhamos
|
||||
tinham
|
||||
tive
|
||||
teve
|
||||
tivemos
|
||||
tiveram
|
||||
tivera
|
||||
tivéramos
|
||||
tenha
|
||||
tenhamos
|
||||
tenham
|
||||
tivesse
|
||||
tivéssemos
|
||||
tivessem
|
||||
tiver
|
||||
tivermos
|
||||
tiverem
|
||||
terei
|
||||
terá
|
||||
teremos
|
||||
terão
|
||||
teria
|
||||
teríamos
|
||||
teriam
|
|
@ -1,233 +0,0 @@
|
|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
|
||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
acea
|
||||
aceasta
|
||||
această
|
||||
aceea
|
||||
acei
|
||||
aceia
|
||||
acel
|
||||
acela
|
||||
acele
|
||||
acelea
|
||||
acest
|
||||
acesta
|
||||
aceste
|
||||
acestea
|
||||
aceşti
|
||||
aceştia
|
||||
acolo
|
||||
acum
|
||||
ai
|
||||
aia
|
||||
aibă
|
||||
aici
|
||||
al
|
||||
ăla
|
||||
ale
|
||||
alea
|
||||
ălea
|
||||
altceva
|
||||
altcineva
|
||||
am
|
||||
ar
|
||||
are
|
||||
aş
|
||||
aşadar
|
||||
asemenea
|
||||
asta
|
||||
ăsta
|
||||
astăzi
|
||||
astea
|
||||
ăstea
|
||||
ăştia
|
||||
asupra
|
||||
aţi
|
||||
au
|
||||
avea
|
||||
avem
|
||||
aveţi
|
||||
azi
|
||||
bine
|
||||
bucur
|
||||
bună
|
||||
ca
|
||||
că
|
||||
căci
|
||||
când
|
||||
care
|
||||
cărei
|
||||
căror
|
||||
cărui
|
||||
cât
|
||||
câte
|
||||
câţi
|
||||
către
|
||||
câtva
|
||||
ce
|
||||
cel
|
||||
ceva
|
||||
chiar
|
||||
cînd
|
||||
cine
|
||||
cineva
|
||||
cît
|
||||
cîte
|
||||
cîţi
|
||||
cîtva
|
||||
contra
|
||||
cu
|
||||
cum
|
||||
cumva
|
||||
curând
|
||||
curînd
|
||||
da
|
||||
dă
|
||||
dacă
|
||||
dar
|
||||
datorită
|
||||
de
|
||||
deci
|
||||
deja
|
||||
deoarece
|
||||
departe
|
||||
deşi
|
||||
din
|
||||
dinaintea
|
||||
dintr
|
||||
dintre
|
||||
drept
|
||||
după
|
||||
ea
|
||||
ei
|
||||
el
|
||||
ele
|
||||
eram
|
||||
este
|
||||
eşti
|
||||
eu
|
||||
face
|
||||
fără
|
||||
fi
|
||||
fie
|
||||
fiecare
|
||||
fii
|
||||
fim
|
||||
fiţi
|
||||
iar
|
||||
ieri
|
||||
îi
|
||||
îl
|
||||
îmi
|
||||
împotriva
|
||||
în
|
||||
înainte
|
||||
înaintea
|
||||
încât
|
||||
încît
|
||||
încotro
|
||||
între
|
||||
întrucât
|
||||
întrucît
|
||||
îţi
|
||||
la
|
||||
lângă
|
||||
le
|
||||
li
|
||||
lîngă
|
||||
lor
|
||||
lui
|
||||
mă
|
||||
mâine
|
||||
mea
|
||||
mei
|
||||
mele
|
||||
mereu
|
||||
meu
|
||||
mi
|
||||
mine
|
||||
mult
|
||||
multă
|
||||
mulţi
|
||||
ne
|
||||
nicăieri
|
||||
nici
|
||||
nimeni
|
||||
nişte
|
||||
noastră
|
||||
noastre
|
||||
noi
|
||||
noştri
|
||||
nostru
|
||||
nu
|
||||
ori
|
||||
oricând
|
||||
oricare
|
||||
oricât
|
||||
orice
|
||||
oricînd
|
||||
oricine
|
||||
oricît
|
||||
oricum
|
||||
oriunde
|
||||
până
|
||||
pe
|
||||
pentru
|
||||
peste
|
||||
pînă
|
||||
poate
|
||||
pot
|
||||
prea
|
||||
prima
|
||||
primul
|
||||
prin
|
||||
printr
|
||||
sa
|
||||
să
|
||||
săi
|
||||
sale
|
||||
sau
|
||||
său
|
||||
se
|
||||
şi
|
||||
sînt
|
||||
sîntem
|
||||
sînteţi
|
||||
spre
|
||||
sub
|
||||
sunt
|
||||
suntem
|
||||
sunteţi
|
||||
ta
|
||||
tăi
|
||||
tale
|
||||
tău
|
||||
te
|
||||
ţi
|
||||
ţie
|
||||
tine
|
||||
toată
|
||||
toate
|
||||
tot
|
||||
toţi
|
||||
totuşi
|
||||
tu
|
||||
un
|
||||
una
|
||||
unde
|
||||
undeva
|
||||
unei
|
||||
unele
|
||||
uneori
|
||||
unor
|
||||
vă
|
||||
vi
|
||||
voastră
|
||||
voastre
|
||||
voi
|
||||
voştri
|
||||
vostru
|
||||
vouă
|
||||
vreo
|
||||
vreun
|
|
@ -1,243 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| a russian stop word list. comments begin with vertical bar. each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| this is a ranked list (commonest to rarest) of stopwords derived from
|
||||
| a large text sample.
|
||||
|
||||
| letter `ё' is translated to `е'.
|
||||
|
||||
и | and
|
||||
в | in/into
|
||||
во | alternative form
|
||||
не | not
|
||||
что | what/that
|
||||
он | he
|
||||
на | on/onto
|
||||
я | i
|
||||
с | from
|
||||
со | alternative form
|
||||
как | how
|
||||
а | milder form of `no' (but)
|
||||
то | conjunction and form of `that'
|
||||
все | all
|
||||
она | she
|
||||
так | so, thus
|
||||
его | him
|
||||
но | but
|
||||
да | yes/and
|
||||
ты | thou
|
||||
к | towards, by
|
||||
у | around, chez
|
||||
же | intensifier particle
|
||||
вы | you
|
||||
за | beyond, behind
|
||||
бы | conditional/subj. particle
|
||||
по | up to, along
|
||||
только | only
|
||||
ее | her
|
||||
мне | to me
|
||||
было | it was
|
||||
вот | here is/are, particle
|
||||
от | away from
|
||||
меня | me
|
||||
еще | still, yet, more
|
||||
нет | no, there isnt/arent
|
||||
о | about
|
||||
из | out of
|
||||
ему | to him
|
||||
теперь | now
|
||||
когда | when
|
||||
даже | even
|
||||
ну | so, well
|
||||
вдруг | suddenly
|
||||
ли | interrogative particle
|
||||
если | if
|
||||
уже | already, but homonym of `narrower'
|
||||
или | or
|
||||
ни | neither
|
||||
быть | to be
|
||||
был | he was
|
||||
него | prepositional form of его
|
||||
до | up to
|
||||
вас | you accusative
|
||||
нибудь | indef. suffix preceded by hyphen
|
||||
опять | again
|
||||
уж | already, but homonym of `adder'
|
||||
вам | to you
|
||||
сказал | he said
|
||||
ведь | particle `after all'
|
||||
там | there
|
||||
потом | then
|
||||
себя | oneself
|
||||
ничего | nothing
|
||||
ей | to her
|
||||
может | usually with `быть' as `maybe'
|
||||
они | they
|
||||
тут | here
|
||||
где | where
|
||||
есть | there is/are
|
||||
надо | got to, must
|
||||
ней | prepositional form of ей
|
||||
для | for
|
||||
мы | we
|
||||
тебя | thee
|
||||
их | them, their
|
||||
чем | than
|
||||
была | she was
|
||||
сам | self
|
||||
чтоб | in order to
|
||||
без | without
|
||||
будто | as if
|
||||
человек | man, person, one
|
||||
чего | genitive form of `what'
|
||||
раз | once
|
||||
тоже | also
|
||||
себе | to oneself
|
||||
под | beneath
|
||||
жизнь | life
|
||||
будет | will be
|
||||
ж | short form of intensifer particle `же'
|
||||
тогда | then
|
||||
кто | who
|
||||
этот | this
|
||||
говорил | was saying
|
||||
того | genitive form of `that'
|
||||
потому | for that reason
|
||||
этого | genitive form of `this'
|
||||
какой | which
|
||||
совсем | altogether
|
||||
ним | prepositional form of `его', `они'
|
||||
здесь | here
|
||||
этом | prepositional form of `этот'
|
||||
один | one
|
||||
почти | almost
|
||||
мой | my
|
||||
тем | instrumental/dative plural of `тот', `то'
|
||||
чтобы | full form of `in order that'
|
||||
нее | her (acc.)
|
||||
кажется | it seems
|
||||
сейчас | now
|
||||
были | they were
|
||||
куда | where to
|
||||
зачем | why
|
||||
сказать | to say
|
||||
всех | all (acc., gen. preposn. plural)
|
||||
никогда | never
|
||||
сегодня | today
|
||||
можно | possible, one can
|
||||
при | by
|
||||
наконец | finally
|
||||
два | two
|
||||
об | alternative form of `о', about
|
||||
другой | another
|
||||
хоть | even
|
||||
после | after
|
||||
над | above
|
||||
больше | more
|
||||
тот | that one (masc.)
|
||||
через | across, in
|
||||
эти | these
|
||||
нас | us
|
||||
про | about
|
||||
всего | in all, only, of all
|
||||
них | prepositional form of `они' (they)
|
||||
какая | which, feminine
|
||||
много | lots
|
||||
разве | interrogative particle
|
||||
сказала | she said
|
||||
три | three
|
||||
эту | this, acc. fem. sing.
|
||||
моя | my, feminine
|
||||
впрочем | moreover, besides
|
||||
хорошо | good
|
||||
свою | ones own, acc. fem. sing.
|
||||
этой | oblique form of `эта', fem. `this'
|
||||
перед | in front of
|
||||
иногда | sometimes
|
||||
лучше | better
|
||||
чуть | a little
|
||||
том | preposn. form of `that one'
|
||||
нельзя | one must not
|
||||
такой | such a one
|
||||
им | to them
|
||||
более | more
|
||||
всегда | always
|
||||
конечно | of course
|
||||
всю | acc. fem. sing of `all'
|
||||
между | between
|
||||
|
||||
|
||||
| b: some paradigms
|
||||
|
|
||||
| personal pronouns
|
||||
|
|
||||
| я меня мне мной [мною]
|
||||
| ты тебя тебе тобой [тобою]
|
||||
| он его ему им [него, нему, ним]
|
||||
| она ее эи ею [нее, нэи, нею]
|
||||
| оно его ему им [него, нему, ним]
|
||||
|
|
||||
| мы нас нам нами
|
||||
| вы вас вам вами
|
||||
| они их им ими [них, ним, ними]
|
||||
|
|
||||
| себя себе собой [собою]
|
||||
|
|
||||
| demonstrative pronouns: этот (this), тот (that)
|
||||
|
|
||||
| этот эта это эти
|
||||
| этого эты это эти
|
||||
| этого этой этого этих
|
||||
| этому этой этому этим
|
||||
| этим этой этим [этою] этими
|
||||
| этом этой этом этих
|
||||
|
|
||||
| тот та то те
|
||||
| того ту то те
|
||||
| того той того тех
|
||||
| тому той тому тем
|
||||
| тем той тем [тою] теми
|
||||
| том той том тех
|
||||
|
|
||||
| determinative pronouns
|
||||
|
|
||||
| (a) весь (all)
|
||||
|
|
||||
| весь вся все все
|
||||
| всего всю все все
|
||||
| всего всей всего всех
|
||||
| всему всей всему всем
|
||||
| всем всей всем [всею] всеми
|
||||
| всем всей всем всех
|
||||
|
|
||||
| (b) сам (himself etc)
|
||||
|
|
||||
| сам сама само сами
|
||||
| самого саму само самих
|
||||
| самого самой самого самих
|
||||
| самому самой самому самим
|
||||
| самим самой самим [самою] самими
|
||||
| самом самой самом самих
|
||||
|
|
||||
| stems of verbs `to be', `to have', `to do' and modal
|
||||
|
|
||||
| быть бы буд быв есть суть
|
||||
| име
|
||||
| дел
|
||||
| мог мож мочь
|
||||
| уме
|
||||
| хоч хот
|
||||
| долж
|
||||
| можн
|
||||
| нужн
|
||||
| нельзя
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
| From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
|
||||
| This file is distributed under the BSD License.
|
||||
| See http://snowball.tartarus.org/license.php
|
||||
| Also see http://www.opensource.org/licenses/bsd-license.html
|
||||
| - Encoding was converted to UTF-8.
|
||||
| - This notice was added.
|
||||
|
|
||||
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
|
||||
|
||||
| A Swedish stop word list. Comments begin with vertical bar. Each stop
|
||||
| word is at the start of a line.
|
||||
|
||||
| This is a ranked list (commonest to rarest) of stopwords derived from
|
||||
| a large text sample.
|
||||
|
||||
| Swedish stop words occasionally exhibit homonym clashes. For example
|
||||
| så = so, but also seed. These are indicated clearly below.
|
||||
|
||||
och | and
|
||||
det | it, this/that
|
||||
att | to (with infinitive)
|
||||
i | in, at
|
||||
en | a
|
||||
jag | I
|
||||
hon | she
|
||||
som | who, that
|
||||
han | he
|
||||
på | on
|
||||
den | it, this/that
|
||||
med | with
|
||||
var | where, each
|
||||
sig | him(self) etc
|
||||
för | for
|
||||
så | so (also: seed)
|
||||
till | to
|
||||
är | is
|
||||
men | but
|
||||
ett | a
|
||||
om | if; around, about
|
||||
hade | had
|
||||
de | they, these/those
|
||||
av | of
|
||||
icke | not, no
|
||||
mig | me
|
||||
du | you
|
||||
henne | her
|
||||
då | then, when
|
||||
sin | his
|
||||
nu | now
|
||||
har | have
|
||||
inte | inte någon = no one
|
||||
hans | his
|
||||
honom | him
|
||||
skulle | 'sake'
|
||||
hennes | her
|
||||
där | there
|
||||
min | my
|
||||
man | one (pronoun)
|
||||
ej | nor
|
||||
vid | at, by, on (also: vast)
|
||||
kunde | could
|
||||
något | some etc
|
||||
från | from, off
|
||||
ut | out
|
||||
när | when
|
||||
efter | after, behind
|
||||
upp | up
|
||||
vi | we
|
||||
dem | them
|
||||
vara | be
|
||||
vad | what
|
||||
över | over
|
||||
än | than
|
||||
dig | you
|
||||
kan | can
|
||||
sina | his
|
||||
här | here
|
||||
ha | have
|
||||
mot | towards
|
||||
alla | all
|
||||
under | under (also: wonder)
|
||||
någon | some etc
|
||||
eller | or (else)
|
||||
allt | all
|
||||
mycket | much
|
||||
sedan | since
|
||||
ju | why
|
||||
denna | this/that
|
||||
själv | myself, yourself etc
|
||||
detta | this/that
|
||||
åt | to
|
||||
utan | without
|
||||
varit | was
|
||||
hur | how
|
||||
ingen | no
|
||||
mitt | my
|
||||
ni | you
|
||||
bli | to be, become
|
||||
blev | from bli
|
||||
oss | us
|
||||
din | thy
|
||||
dessa | these/those
|
||||
några | some etc
|
||||
deras | their
|
||||
blir | from bli
|
||||
mina | my
|
||||
samma | (the) same
|
||||
vilken | who, that
|
||||
er | you, your
|
||||
sådan | such a
|
||||
vår | our
|
||||
blivit | from bli
|
||||
dess | its
|
||||
inom | within
|
||||
mellan | between
|
||||
sådant | such a
|
||||
varför | why
|
||||
varje | each
|
||||
vilka | who, that
|
||||
ditt | thy
|
||||
vem | who
|
||||
vilket | who, that
|
||||
sitta | his
|
||||
sådana | such a
|
||||
vart | each
|
||||
dina | thy
|
||||
vars | whose
|
||||
vårt | our
|
||||
våra | our
|
||||
ert | your
|
||||
era | your
|
||||
vilkas | whose
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
# Thai stopwords from:
|
||||
# "Opinion Detection in Thai Political News Columns
|
||||
# Based on Subjectivity Analysis"
|
||||
# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
|
||||
ไว้
|
||||
ไม่
|
||||
ไป
|
||||
ได้
|
||||
ให้
|
||||
ใน
|
||||
โดย
|
||||
แห่ง
|
||||
แล้ว
|
||||
และ
|
||||
แรก
|
||||
แบบ
|
||||
แต่
|
||||
เอง
|
||||
เห็น
|
||||
เลย
|
||||
เริ่ม
|
||||
เรา
|
||||
เมื่อ
|
||||
เพื่อ
|
||||
เพราะ
|
||||
เป็นการ
|
||||
เป็น
|
||||
เปิดเผย
|
||||
เปิด
|
||||
เนื่องจาก
|
||||
เดียวกัน
|
||||
เดียว
|
||||
เช่น
|
||||
เฉพาะ
|
||||
เคย
|
||||
เข้า
|
||||
เขา
|
||||
อีก
|
||||
อาจ
|
||||
อะไร
|
||||
ออก
|
||||
อย่าง
|
||||
อยู่
|
||||
อยาก
|
||||
หาก
|
||||
หลาย
|
||||
หลังจาก
|
||||
หลัง
|
||||
หรือ
|
||||
หนึ่ง
|
||||
ส่วน
|
||||
ส่ง
|
||||
สุด
|
||||
สําหรับ
|
||||
ว่า
|
||||
วัน
|
||||
ลง
|
||||
ร่วม
|
||||
ราย
|
||||
รับ
|
||||
ระหว่าง
|
||||
รวม
|
||||
ยัง
|
||||
มี
|
||||
มาก
|
||||
มา
|
||||
พร้อม
|
||||
พบ
|
||||
ผ่าน
|
||||
ผล
|
||||
บาง
|
||||
น่า
|
||||
นี้
|
||||
นํา
|
||||
นั้น
|
||||
นัก
|
||||
นอกจาก
|
||||
ทุก
|
||||
ที่สุด
|
||||
ที่
|
||||
ทําให้
|
||||
ทํา
|
||||
ทาง
|
||||
ทั้งนี้
|
||||
ทั้ง
|
||||
ถ้า
|
||||
ถูก
|
||||
ถึง
|
||||
ต้อง
|
||||
ต่างๆ
|
||||
ต่าง
|
||||
ต่อ
|
||||
ตาม
|
||||
ตั้งแต่
|
||||
ตั้ง
|
||||
ด้าน
|
||||
ด้วย
|
||||
ดัง
|
||||
ซึ่ง
|
||||
ช่วง
|
||||
จึง
|
||||
จาก
|
||||
จัด
|
||||
จะ
|
||||
คือ
|
||||
ความ
|
||||
ครั้ง
|
||||
คง
|
||||
ขึ้น
|
||||
ของ
|
||||
ขอ
|
||||
ขณะ
|
||||
ก่อน
|
||||
ก็
|
||||
การ
|
||||
กับ
|
||||
กัน
|
||||
กว่า
|
||||
กล่าว
|
|
@ -1,212 +0,0 @@
|
|||
# Turkish stopwords from LUCENE-559
|
||||
# merged with the list from "Information Retrieval on Turkish Texts"
|
||||
# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
|
||||
acaba
|
||||
altmış
|
||||
altı
|
||||
ama
|
||||
ancak
|
||||
arada
|
||||
aslında
|
||||
ayrıca
|
||||
bana
|
||||
bazı
|
||||
belki
|
||||
ben
|
||||
benden
|
||||
beni
|
||||
benim
|
||||
beri
|
||||
beş
|
||||
bile
|
||||
bin
|
||||
bir
|
||||
birçok
|
||||
biri
|
||||
birkaç
|
||||
birkez
|
||||
birşey
|
||||
birşeyi
|
||||
biz
|
||||
bize
|
||||
bizden
|
||||
bizi
|
||||
bizim
|
||||
böyle
|
||||
böylece
|
||||
bu
|
||||
buna
|
||||
bunda
|
||||
bundan
|
||||
bunlar
|
||||
bunları
|
||||
bunların
|
||||
bunu
|
||||
bunun
|
||||
burada
|
||||
çok
|
||||
çünkü
|
||||
da
|
||||
daha
|
||||
dahi
|
||||
de
|
||||
defa
|
||||
değil
|
||||
diğer
|
||||
diye
|
||||
doksan
|
||||
dokuz
|
||||
dolayı
|
||||
dolayısıyla
|
||||
dört
|
||||
edecek
|
||||
eden
|
||||
ederek
|
||||
edilecek
|
||||
ediliyor
|
||||
edilmesi
|
||||
ediyor
|
||||
eğer
|
||||
elli
|
||||
en
|
||||
etmesi
|
||||
etti
|
||||
ettiği
|
||||
ettiğini
|
||||
gibi
|
||||
göre
|
||||
halen
|
||||
hangi
|
||||
hatta
|
||||
hem
|
||||
henüz
|
||||
hep
|
||||
hepsi
|
||||
her
|
||||
herhangi
|
||||
herkesin
|
||||
hiç
|
||||
hiçbir
|
||||
için
|
||||
iki
|
||||
ile
|
||||
ilgili
|
||||
ise
|
||||
işte
|
||||
itibaren
|
||||
itibariyle
|
||||
kadar
|
||||
karşın
|
||||
katrilyon
|
||||
kendi
|
||||
kendilerine
|
||||
kendini
|
||||
kendisi
|
||||
kendisine
|
||||
kendisini
|
||||
kez
|
||||
ki
|
||||
kim
|
||||
kimden
|
||||
kime
|
||||
kimi
|
||||
kimse
|
||||
kırk
|
||||
milyar
|
||||
milyon
|
||||
mu
|
||||
mü
|
||||
mı
|
||||
nasıl
|
||||
ne
|
||||
neden
|
||||
nedenle
|
||||
nerde
|
||||
nerede
|
||||
nereye
|
||||
niye
|
||||
niçin
|
||||
o
|
||||
olan
|
||||
olarak
|
||||
oldu
|
||||
olduğu
|
||||
olduğunu
|
||||
olduklarını
|
||||
olmadı
|
||||
olmadığı
|
||||
olmak
|
||||
olması
|
||||
olmayan
|
||||
olmaz
|
||||
olsa
|
||||
olsun
|
||||
olup
|
||||
olur
|
||||
olursa
|
||||
oluyor
|
||||
on
|
||||
ona
|
||||
ondan
|
||||
onlar
|
||||
onlardan
|
||||
onları
|
||||
onların
|
||||
onu
|
||||
onun
|
||||
otuz
|
||||
oysa
|
||||
öyle
|
||||
pek
|
||||
rağmen
|
||||
sadece
|
||||
sanki
|
||||
sekiz
|
||||
seksen
|
||||
sen
|
||||
senden
|
||||
seni
|
||||
senin
|
||||
siz
|
||||
sizden
|
||||
sizi
|
||||
sizin
|
||||
şey
|
||||
şeyden
|
||||
şeyi
|
||||
şeyler
|
||||
şöyle
|
||||
şu
|
||||
şuna
|
||||
şunda
|
||||
şundan
|
||||
şunları
|
||||
şunu
|
||||
tarafından
|
||||
trilyon
|
||||
tüm
|
||||
üç
|
||||
üzere
|
||||
var
|
||||
vardı
|
||||
ve
|
||||
veya
|
||||
ya
|
||||
yani
|
||||
yapacak
|
||||
yapılan
|
||||
yapılması
|
||||
yapıyor
|
||||
yapmak
|
||||
yaptı
|
||||
yaptığı
|
||||
yaptığını
|
||||
yaptıkları
|
||||
yedi
|
||||
yerine
|
||||
yetmiş
|
||||
yine
|
||||
yirmi
|
||||
yoksa
|
||||
yüz
|
||||
zaten
|
|
@ -1,29 +0,0 @@
|
|||
#
|
||||
# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
|
||||
#
|
||||
# Add entries to this file in order to override the statistical model in terms
|
||||
# of segmentation, readings and part-of-speech tags. Notice that entries do
|
||||
# not have weights since they are always used when found. This is by-design
|
||||
# in order to maximize ease-of-use.
|
||||
#
|
||||
# Entries are defined using the following CSV format:
|
||||
# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
|
||||
#
|
||||
# Notice that a single half-width space separates tokens and readings, and
|
||||
# that the number tokens and readings must match exactly.
|
||||
#
|
||||
# Also notice that multiple entries with the same <text> is undefined.
|
||||
#
|
||||
# Whitespace only lines are ignored. Comments are not allowed on entry lines.
|
||||
#
|
||||
|
||||
# Custom segmentation for kanji compounds
|
||||
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
|
||||
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
|
||||
|
||||
# Custom segmentation for compound katakana
|
||||
トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
|
||||
ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
|
||||
|
||||
# Custom reading for former sumo wrestler
|
||||
朝青龍,朝青龍,アサショウリュウ,カスタム人名
|
|
@ -1,161 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<schema name="example" version="1.6">
|
||||
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
|
||||
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
||||
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
|
||||
|
||||
<!--
|
||||
Numeric field types that index values using KD-trees.
|
||||
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
|
||||
-->
|
||||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
|
||||
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
|
||||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
|
||||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
|
||||
|
||||
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright. -->
|
||||
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||
|
||||
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
||||
is a more restricted form of the canonical representation of dateTime
|
||||
http://www.w3.org/TR/xmlschema-2/#dateTime
|
||||
The trailing "Z" designates UTC time and is mandatory.
|
||||
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
||||
All other components are mandatory.
|
||||
|
||||
Expressions can also be used to denote calculations that should be
|
||||
performed relative to "NOW" to determine the value, ie...
|
||||
|
||||
NOW/HOUR
|
||||
... Round to the start of the current hour
|
||||
NOW-1DAY
|
||||
... Exactly 1 day prior to now
|
||||
NOW/DAY+6MONTHS+3DAYS
|
||||
... 6 months and 3 days in the future from the start of
|
||||
the current day
|
||||
|
||||
-->
|
||||
<!-- KD-tree versions of date fields -->
|
||||
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
|
||||
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
|
||||
|
||||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||
<fieldType name="binary" class="solr.BinaryField"/>
|
||||
|
||||
<!--
|
||||
RankFields can be used to store scoring factors to improve document ranking. They should be used
|
||||
in combination with RankQParserPlugin.
|
||||
(experimental)
|
||||
-->
|
||||
<fieldType name="rank" class="solr.RankField"/>
|
||||
|
||||
<!-- solr.TextField allows the specification of custom text analyzers
|
||||
specified as a tokenizer and a list of token filters. Different
|
||||
analyzers may be specified for indexing and querying.
|
||||
|
||||
The optional positionIncrementGap puts space between multiple fields of
|
||||
this type on the same document, with the purpose of preventing false phrase
|
||||
matching across fields.
|
||||
|
||||
For more info on customizing your analyzer chain, please see
|
||||
https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters
|
||||
-->
|
||||
|
||||
<!-- One can also specify an existing Analyzer class that has a
|
||||
default constructor via the class attribute on the analyzer element.
|
||||
Example:
|
||||
<fieldType name="text_greek" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
||||
</fieldType>
|
||||
-->
|
||||
|
||||
<fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer name="standard"/>
|
||||
<filter name="lowercase"/>
|
||||
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer name="standard"/>
|
||||
<filter name="lowercase"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer name="standard"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
|
||||
<filter name="lowercase"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer name="standard"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
|
||||
<filter name="lowercase"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_en_number_splitting" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer name="whitespace"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter name="lowercase"/>
|
||||
<filter name="porterStem"/>
|
||||
<filter name="flattenGraph" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer name="whitespace"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter name="lowercase"/>
|
||||
<filter name="porterStem"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer name="whitespace"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter name="lowercase"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />
|
||||
<filter name="porterStem"/>
|
||||
<filter name="flattenGraph" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer name="whitespace"/>
|
||||
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
||||
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter name="lowercase"/>
|
||||
<filter name="porterStem"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Fields -->
|
||||
<field name="id" type="string" indexed="true" required="true" />
|
||||
<field name="title" type="text_en_number_splitting" indexed="true" stored="true" required="true" />
|
||||
<field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />
|
||||
<field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />
|
||||
<field name="len" type="pint" indexed="false" stored="true" required="true" />
|
||||
|
||||
<!-- Copy title to title_prefix for matching -->
|
||||
<field name="title_prefix" type="text_prefix" indexed="true" stored="false" />
|
||||
<copyField source="title" dest="title_prefix" />
|
||||
|
||||
<field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>
|
||||
|
||||
<!-- Unique Key -->
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
File diff suppressed because it is too large
Load diff
|
@ -12,17 +12,13 @@
|
|||
(require-reloadable "src/page-category.rkt" page-category)
|
||||
(require-reloadable "src/page-global-search.rkt" page-global-search)
|
||||
(require-reloadable "src/page-home.rkt" page-home)
|
||||
(require-reloadable "src/page-it-works.rkt" page-it-works)
|
||||
(require-reloadable "src/page-not-found.rkt" page-not-found)
|
||||
(require-reloadable "src/page-proxy.rkt" page-proxy)
|
||||
(require-reloadable "src/page-redirect-wiki-home.rkt" redirect-wiki-home)
|
||||
(require-reloadable "src/page-search.rkt" page-search)
|
||||
(require-reloadable "src/page-set-user-settings.rkt" page-set-user-settings)
|
||||
(require-reloadable "src/page-static.rkt" static-dispatcher)
|
||||
(require-reloadable "src/page-static-archive.rkt" page-static-archive)
|
||||
(require-reloadable "src/page-subdomain.rkt" subdomain-dispatcher)
|
||||
(require-reloadable "src/page-wiki.rkt" page-wiki)
|
||||
(require-reloadable "src/page-wiki-offline.rkt" page-wiki-offline)
|
||||
(require-reloadable "src/page-file.rkt" page-file)
|
||||
|
||||
(reload!)
|
||||
|
@ -30,9 +26,7 @@
|
|||
(define ch (make-channel))
|
||||
(define (start)
|
||||
(serve/launch/wait
|
||||
#:listen-ip (if (equal? (config-get 'bind_host) "auto")
|
||||
(if (config-true? 'debug) "127.0.0.1" #f)
|
||||
(config-get 'bind_host))
|
||||
#:listen-ip (if (config-true? 'debug) "127.0.0.1" #f)
|
||||
#:port (string->number (config-get 'port))
|
||||
(λ (quit)
|
||||
(channel-put ch (lambda () (semaphore-post quit)))
|
||||
|
@ -41,14 +35,10 @@
|
|||
page-category
|
||||
page-global-search
|
||||
page-home
|
||||
page-it-works
|
||||
page-not-found
|
||||
page-proxy
|
||||
page-search
|
||||
page-set-user-settings
|
||||
page-static-archive
|
||||
page-wiki
|
||||
page-wiki-offline
|
||||
page-file
|
||||
redirect-wiki-home
|
||||
static-dispatcher
|
||||
|
|
12
dist.rkt
12
dist.rkt
|
@ -6,23 +6,17 @@
|
|||
(require (only-in "src/page-category.rkt" page-category))
|
||||
(require (only-in "src/page-global-search.rkt" page-global-search))
|
||||
(require (only-in "src/page-home.rkt" page-home))
|
||||
(require (only-in "src/page-it-works.rkt" page-it-works))
|
||||
(require (only-in "src/page-not-found.rkt" page-not-found))
|
||||
(require (only-in "src/page-proxy.rkt" page-proxy))
|
||||
(require (only-in "src/page-redirect-wiki-home.rkt" redirect-wiki-home))
|
||||
(require (only-in "src/page-search.rkt" page-search))
|
||||
(require (only-in "src/page-set-user-settings.rkt" page-set-user-settings))
|
||||
(require (only-in "src/page-static.rkt" static-dispatcher))
|
||||
(require (only-in "src/page-static-archive.rkt" page-static-archive))
|
||||
(require (only-in "src/page-subdomain.rkt" subdomain-dispatcher))
|
||||
(require (only-in "src/page-wiki.rkt" page-wiki))
|
||||
(require (only-in "src/page-wiki-offline.rkt" page-wiki-offline))
|
||||
(require (only-in "src/page-file.rkt" page-file))
|
||||
|
||||
(serve/launch/wait
|
||||
#:listen-ip (if (equal? (config-get 'bind_host) "auto")
|
||||
(if (config-true? 'debug) "127.0.0.1" #f)
|
||||
(config-get 'bind_host))
|
||||
#:listen-ip (if (config-true? 'debug) "127.0.0.1" #f)
|
||||
#:port (string->number (config-get 'port))
|
||||
(λ (quit)
|
||||
(dispatcher-tree
|
||||
|
@ -30,14 +24,10 @@
|
|||
page-category
|
||||
page-global-search
|
||||
page-home
|
||||
page-it-works
|
||||
page-not-found
|
||||
page-proxy
|
||||
page-search
|
||||
page-set-user-settings
|
||||
page-static-archive
|
||||
page-wiki
|
||||
page-wiki-offline
|
||||
page-file
|
||||
redirect-wiki-home
|
||||
static-dispatcher
|
||||
|
|
2
info.rkt
2
info.rkt
|
@ -1,3 +1,3 @@
|
|||
#lang info
|
||||
|
||||
(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "typed-ini-lib" "memo" "net-cookies-lib" "db"))
|
||||
(define build-deps '("rackunit-lib" "web-server-lib" "http-easy-lib" "html-parsing" "html-writing" "json-pointer" "ini-lib" "memo"))
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/string
|
||||
net/url
|
||||
(only-in net/uri-codec uri-decode)
|
||||
"url-utils.rkt")
|
||||
(provide
|
||||
local-encoded-url->segments
|
||||
url-segments->basename
|
||||
local-encoded-url->basename
|
||||
title->basename
|
||||
basename->name-for-query
|
||||
url-segments->guess-title)
|
||||
|
||||
(define (local-encoded-url->segments str) ; '("wiki" "Page_title")
|
||||
(map path/param-path (fix-semicolons-url-path (url-path (string->url str)))))
|
||||
|
||||
(define (url-segments->basename segments) ; "Page_title" filename encoded, no extension or dir prefix
|
||||
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) (cdr segments)))
|
||||
(define basic-filename (string-join extra-encoded "#"))
|
||||
basic-filename)
|
||||
|
||||
(define (local-encoded-url->basename str) ; '("wiki" "Page_title"), no extension or dir prefix
|
||||
(url-segments->basename (local-encoded-url->segments str)))
|
||||
|
||||
(define (title->basename title) ; "Page title/Strategies" -> "Page_title#Strategies" filename encoded, no extension or dir prefi
|
||||
(define elements (string-split (string-replace title " " "_") "/"))
|
||||
(define extra-encoded (map (λ (s) (bytes->string/latin-1 (percent-encode s filename-set #f))) elements))
|
||||
(define basic-filename (string-join extra-encoded "#"))
|
||||
basic-filename)
|
||||
|
||||
(define (basename->name-for-query str)
|
||||
(uri-decode (regexp-replace* #rx"#" str "/")))
|
||||
|
||||
(define (url-segments->guess-title segments)
|
||||
(regexp-replace* #rx"_" (cadr segments) " "))
|
File diff suppressed because it is too large
Load diff
|
@ -1,34 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/contract
|
||||
racket/match
|
||||
racket/path
|
||||
racket/runtime-path
|
||||
racket/string)
|
||||
|
||||
(provide
|
||||
(contract-out
|
||||
[ext->mime-type (-> bytes? bytes?)]
|
||||
[mime-type->ext (-> bytes? bytes?)]))
|
||||
|
||||
(define-runtime-path mime.types-path "mime.types")
|
||||
|
||||
(define ls
|
||||
(call-with-input-file mime.types-path
|
||||
(λ (in) (for/list ([line (in-lines in)]
|
||||
#:when (not (regexp-match? #rx"^ *($|#)" line)))
|
||||
(match line
|
||||
[(regexp #rx"^([^ ]+) +(.+)$" (list _ mime ext))
|
||||
(cons (string->bytes/utf-8 ext) (string->bytes/utf-8 mime))]
|
||||
[(regexp #rx"^ *#") (void)]
|
||||
[_ (log-warning "mime-types: failed to parse line ~s" line)])))))
|
||||
|
||||
(define forward-hash (make-immutable-hash ls))
|
||||
(define reverse-hash (make-immutable-hash (map (λ (x) (cons (cdr x) (car x))) ls)))
|
||||
|
||||
(define (ext->mime-type ext-in)
|
||||
(define ext (regexp-replace #rx"^\\." ext-in #""))
|
||||
(hash-ref forward-hash ext))
|
||||
|
||||
(define (mime-type->ext m-in)
|
||||
(define m (regexp-replace #rx";.*" m-in #""))
|
||||
(hash-ref reverse-hash m))
|
|
@ -1,90 +0,0 @@
|
|||
text/html html
|
||||
text/css css
|
||||
application/xml xml
|
||||
text/xml xml
|
||||
image/gif gif
|
||||
image/jpeg jpeg
|
||||
application/javascript js
|
||||
text/javascript js
|
||||
application/atom+xml atom
|
||||
application/rss+xml rss
|
||||
|
||||
text/mathml mml
|
||||
text/plain txt
|
||||
text/x-component htc
|
||||
|
||||
image/png png
|
||||
image/tiff tiff
|
||||
image/vnd.wap.wbmp wbmp
|
||||
image/x-icon ico
|
||||
image/vnd.microsoft.icon ico
|
||||
image/x-jng jng
|
||||
image/x-ms-bmp bmp
|
||||
image/svg+xml svg
|
||||
image/webp webp
|
||||
image/avif avif
|
||||
|
||||
application/font-woff2 woff2
|
||||
application/acad woff2
|
||||
font/woff2 woff2
|
||||
application/font-woff woff
|
||||
font/woff woff
|
||||
application/x-font-ttf ttf
|
||||
application/x-font-truetype ttf
|
||||
application/x-truetype-font ttf
|
||||
font/ttf ttf
|
||||
application/font-sfnt ttf
|
||||
font/sfnt ttf
|
||||
application/vnd.oasis.opendocument.formula-template otf
|
||||
application/x-font-opentype otf
|
||||
application/vnd.ms-opentype otf
|
||||
font/otf otf
|
||||
application/java-archive jar
|
||||
application/json json
|
||||
application/mac-binhex40 hqx
|
||||
application/msword doc
|
||||
application/pdf pdf
|
||||
application/postscript ps
|
||||
application/rtf rtf
|
||||
application/vnd.apple.mpegurl m3u8
|
||||
application/vnd.ms-excel xls
|
||||
application/vnd.ms-fontobject eot
|
||||
application/vnd.ms-powerpoint ppt
|
||||
application/vnd.wap.wmlc wmlc
|
||||
application/vnd.google-earth.kml+xml kml
|
||||
application/vnd.google-earth.kmz kmz
|
||||
application/x-7z-compressed 7z
|
||||
application/x-cocoa cco
|
||||
application/x-java-archive-diff jardiff
|
||||
application/x-java-jnlp-file jnlp
|
||||
application/x-makeself run
|
||||
application/x-perl pl
|
||||
application/x-rar-compressed rar
|
||||
application/x-redhat-package-manager rpm
|
||||
application/x-sea sea
|
||||
application/x-shockwave-flash swf
|
||||
application/x-stuffit sit
|
||||
application/x-tcl tcl
|
||||
application/x-x509-ca-cert pem
|
||||
application/x-xpinstall xpi
|
||||
application/xhtml+xml xhtml
|
||||
application/xspf+xml xspf
|
||||
application/zip zip
|
||||
application/gzip gz
|
||||
|
||||
audio/midi mid midi kar
|
||||
audio/mpeg mp3
|
||||
audio/ogg ogg
|
||||
audio/x-m4a m4a
|
||||
audio/x-realaudio ra
|
||||
|
||||
video/mp2t ts
|
||||
video/mp4 mp4
|
||||
video/mpeg mpeg
|
||||
video/quicktime mov
|
||||
video/webm webm
|
||||
video/x-flv flv
|
||||
video/x-m4v m4v
|
||||
video/x-mng mng
|
||||
video/x-ms-wmv wmv
|
||||
video/x-msvideo avi
|
148
lib/syntax.rkt
148
lib/syntax.rkt
|
@ -1,148 +0,0 @@
|
|||
#lang racket/base
|
||||
(require (for-syntax racket/base syntax/location))
|
||||
|
||||
(provide
|
||||
; help make a nested if. if/in will gain the same false form of its containing if/out.
|
||||
if/out
|
||||
; cond, but values can be defined between conditions
|
||||
cond/var
|
||||
; wrap sql statements into lambdas so they can be executed during migration
|
||||
wrap-sql
|
||||
; get the name of the file that contains the currently evaluating form
|
||||
this-directory
|
||||
this-file
|
||||
; replacement for define-runtime-path
|
||||
anytime-path)
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(define (check-syntax-equal? s1 s2)
|
||||
(check-equal? (syntax->datum s1)
|
||||
(syntax->datum s2))))
|
||||
|
||||
;; actual transforming goes on in here.
|
||||
;; it's in a submodule so that it can be required in both levels, for testing
|
||||
|
||||
(module transform racket/base
|
||||
(require racket/list)
|
||||
|
||||
(provide
|
||||
transform-if/out
|
||||
transform/out-cond/var)
|
||||
|
||||
(define (transform-if/out stx)
|
||||
(define tree (cdr (syntax->datum stx))) ; condition true false
|
||||
(define else (cddr tree)) ; the else branch cons cell
|
||||
(define result
|
||||
(let walk ([node tree])
|
||||
(cond
|
||||
; normally, node should be a full cons cell (a pair) but it might be something else.
|
||||
; situation: reached the end of a list, empty cons cell
|
||||
[(null? node) node]
|
||||
; situation: reached the end of a list, cons cdr was non-list
|
||||
[(symbol? node) node]
|
||||
; normal situation, full cons cell
|
||||
; -- don't go replacing through nested if/out
|
||||
[(and (pair? node) (eq? 'if/out (car node))) node]
|
||||
; -- replace if/in
|
||||
[(and (pair? node) (eq? 'if/in (car node)))
|
||||
(append '(if) (walk (cdr node)) else)]
|
||||
; recurse down pair head and tail
|
||||
[(pair? node) (cons (walk (car node)) (walk (cdr node)))]
|
||||
; something else that can't be recursed into, so pass it through
|
||||
[#t node])))
|
||||
(datum->syntax stx (cons 'if result)))
|
||||
|
||||
(define (transform/out-cond/var stx)
|
||||
(define tree (transform-cond/var (cdr (syntax->datum stx))))
|
||||
(datum->syntax
|
||||
stx
|
||||
tree))
|
||||
|
||||
(define (transform-cond/var tree)
|
||||
(define-values (els temp) (splitf-at tree (λ (el) (and (pair? el) (not (eq? (car el) 'var))))))
|
||||
(define-values (vars rest) (splitf-at temp (λ (el) (and (pair? el) (eq? (car el) 'var)))))
|
||||
(if (null? rest)
|
||||
`(cond ,@els)
|
||||
`(cond
|
||||
,@els
|
||||
[#t
|
||||
(let* ,(for/list ([var vars])
|
||||
(cdr var))
|
||||
,(transform-cond/var rest))]))))
|
||||
|
||||
;; the syntax definitions and their tests go below here
|
||||
|
||||
(require 'transform (for-syntax 'transform))
|
||||
|
||||
(define-syntax (wrap-sql stx)
|
||||
; the arguments
|
||||
(define xs (cdr (syntax->list stx)))
|
||||
; wrap each argument
|
||||
(define wrapped (map (λ (xe) ; xe is the syntax of an argument
|
||||
(if (list? (car (syntax->datum xe)))
|
||||
; it's a list of lists (a list of sql migration steps)
|
||||
; return instead syntax of a lambda that will call everything in xe
|
||||
(datum->syntax stx `(λ () ,@xe))
|
||||
; it's just a single sql migration step
|
||||
; return instead syntax of a lambda that will call xe
|
||||
(datum->syntax stx `(λ () ,xe))))
|
||||
xs))
|
||||
; since I'm returning *code*, I need to return the form (list ...) so that runtime makes a list
|
||||
(datum->syntax stx `(list ,@wrapped)))
|
||||
|
||||
(define-syntax (if/out stx)
|
||||
(transform-if/out stx))
|
||||
(module+ test
|
||||
(check-syntax-equal? (transform-if/out #'(if/out (condition 1) (if/in (condition 2) (do-yes)) (do-no)))
|
||||
#'(if (condition 1) (if (condition 2) (do-yes) (do-no)) (do-no)))
|
||||
(check-equal? (if/out #t (if/in #t 'yes) 'no) 'yes)
|
||||
(check-equal? (if/out #f (if/in #t 'yes) 'no) 'no)
|
||||
(check-equal? (if/out #t (if/in #f 'yes) 'no) 'no)
|
||||
(check-equal? (if/out #f (if/in #f 'yes) 'no) 'no))
|
||||
|
||||
(define-syntax (this-directory stx)
|
||||
(datum->syntax stx (syntax-source-directory stx)))
|
||||
|
||||
(define-syntax (this-file stx)
|
||||
(datum->syntax stx (build-path (or (syntax-source-directory stx) 'same) (syntax-source-file-name stx))))
|
||||
|
||||
(module+ test
|
||||
(require racket/path)
|
||||
(check-equal? (file-name-from-path (this-file)) (build-path "syntax.rkt")))
|
||||
|
||||
(define-syntax (cond/var stx)
|
||||
(transform/out-cond/var stx))
|
||||
(module+ test
|
||||
(check-syntax-equal? (transform/out-cond/var #'(cond/def [#f 0] (var d (* a 2)) [(eq? d 8) d] [#t "not 4"]))
|
||||
#'(cond
|
||||
[#f 0]
|
||||
[#t
|
||||
(let* ([d (* a 2)])
|
||||
(cond
|
||||
[(eq? d 8) d]
|
||||
[#t "not 4"]))])))
|
||||
|
||||
;;; Replacement for define-runtime-path that usually works well and doesn't include the files/folder contents into the distribution.
|
||||
;;; When running from source, should always work appropriately.
|
||||
;;; When running from a distribution, (current-directory) is treated as the root.
|
||||
;;; Usage:
|
||||
;;; * to-root : Path-String * relative path from the source file to the project root
|
||||
;;; * to-dest : Path-String * relative path from the root to the desired file/folder
|
||||
(define-syntax (anytime-path stx)
|
||||
(define-values (_ to-root to-dest) (apply values (syntax->list stx)))
|
||||
(define source (syntax-source stx))
|
||||
(unless (complete-path? source)
|
||||
(error 'anytime-path "syntax source has no directory: ~v" stx))
|
||||
(datum->syntax
|
||||
stx
|
||||
`(let* ([source ,source]
|
||||
[dir-of-source (path-only source)]
|
||||
[_ (unless (path? dir-of-source) (error 'anytime-path "syntax source has no directory: ~v" ,source))]
|
||||
[syntax-to-root (build-path dir-of-source ,to-root)]
|
||||
[root (if (directory-exists? syntax-to-root)
|
||||
;; running on the same filesystem it was compiled on, i.e. it's running the source code out of a directory, and the complication is the intermediate compilation
|
||||
syntax-to-root
|
||||
;; not running on the same filesystem, i.e. it's a distribution. we assume that the current working directory is where the executable is, and treat this as the root.
|
||||
(current-directory))])
|
||||
(simple-form-path (build-path root ,to-dest)))))
|
|
@ -1,71 +0,0 @@
|
|||
#lang racket/base
|
||||
(require "../src/data.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
thread-values)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
(define (thread-values . thunks)
|
||||
(parameterize-break #t
|
||||
(define the-exn (box #f))
|
||||
(define original-thread (current-thread))
|
||||
(define (break e)
|
||||
(when (box-cas! the-exn #f e)
|
||||
(break-thread original-thread))
|
||||
(sleep 0))
|
||||
(define-values (threads channels)
|
||||
(for/fold ([ts null]
|
||||
[chs null]
|
||||
#:result (values (reverse ts) (reverse chs)))
|
||||
([th thunks])
|
||||
(define ch (make-channel))
|
||||
(define t
|
||||
(thread (λ ()
|
||||
(with-handlers ([exn? break])
|
||||
(channel-put ch (th))))))
|
||||
(values (cons t ts) (cons ch chs))))
|
||||
(apply
|
||||
values
|
||||
(with-handlers ([exn:break? (λ (_)
|
||||
(for ([t threads]) (kill-thread t))
|
||||
(if (unbox the-exn)
|
||||
(raise (unbox the-exn))
|
||||
(error 'thread-values "a thread broke, but without reporting its exception")))])
|
||||
(for/list ([ch channels])
|
||||
(channel-get ch))))))
|
||||
|
||||
(module+ test
|
||||
; check that they actually execute concurrently
|
||||
(define ch (make-channel))
|
||||
(check-equal? (let-values ([(a b)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(begin
|
||||
(channel-put ch 'a)
|
||||
(channel-get ch)))
|
||||
(λ ()
|
||||
(begin0
|
||||
(channel-get ch)
|
||||
(channel-put ch 'b))))])
|
||||
(list a b))
|
||||
'(b a))
|
||||
; check that it assigns the correct value to the correct variable
|
||||
(check-equal? (let-values ([(a b)
|
||||
(thread-values
|
||||
(λ () (sleep 0) 'a)
|
||||
(λ () 'b))])
|
||||
(list a b))
|
||||
'(a b))
|
||||
; check that exceptions are passed to the original thread, and other threads are killed
|
||||
;; TODO: if the other thread was making an HTTP request, could it be left stuck open by the kill?
|
||||
(check-equal? (let* ([x "!"]
|
||||
[res
|
||||
(with-handlers ([exn:fail:user? (λ (e) (exn-message e))])
|
||||
(thread-values
|
||||
(λ () (sleep 0) (set! x "?") (println "this side effect should not happen"))
|
||||
(λ () (raise-user-error "catch me"))))])
|
||||
(string-append res x))
|
||||
"catch me!"))
|
|
@ -1,330 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/dict
|
||||
racket/function
|
||||
racket/match
|
||||
racket/string
|
||||
"pure-utils.rkt"
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
preprocess-html-wiki
|
||||
update-tree-wiki)
|
||||
|
||||
(define (preprocess-html-wiki html)
|
||||
(regexp-replace* #rx"(<(?:td|figcaption)[^>]*?>\n?)(?:<li>|[ \t]*?<p class=\"caption\">(.*?)</p>)"
|
||||
html (λ (whole first-tag [contents #f])
|
||||
(if (eq? (string-ref whole 1) #\f) ;; figcaption
|
||||
(string-append first-tag "<span class=\"caption\">" contents "</span>")
|
||||
(string-append first-tag "<ul><li>")))))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
|
||||
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
|
||||
(check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> <p class=\"caption\">Caption text.</p></figcaption></figure>")
|
||||
"<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
|
||||
|
||||
(module+ test
|
||||
(require rackunit
|
||||
"html-parsing/main.rkt")
|
||||
(define wiki-document
|
||||
'(*TOP*
|
||||
(div (@ (class "mw-parser-output"))
|
||||
(aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default"))
|
||||
(h2 (@ (class "pi-item pi-title") (data-source "title"))
|
||||
"Infobox Title")
|
||||
(figure (@ (class "pi-item pi-image") (data-source "image"))
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title ""))
|
||||
(img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail")))))
|
||||
(div (@ (class "pi-item pi-data") (data-source "description"))
|
||||
(h3 (@ (class "pi-data-label"))
|
||||
"Description")
|
||||
(div (@ (class "pi-data-value"))
|
||||
"Mystery infobox!")))
|
||||
(div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed"))
|
||||
(i (b "This section is hidden for dramatic effect."))
|
||||
(div (@ (class "collapsible-content"))
|
||||
(p "Another page link: "
|
||||
(a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page"))
|
||||
"Another Page"))))
|
||||
(figure (@ (class "thumb tnone"))
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image") (data-test-figure-a))
|
||||
(img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D")
|
||||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(class "thumbimage lazyload"))))
|
||||
(noscript
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image"))
|
||||
(img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(class "thumbimage")))))
|
||||
(figcaption "Test figure!"))
|
||||
(iframe (@ (src "https://example.com/iframe-src")))
|
||||
(div (@ (class "reviews"))
|
||||
(header "GameSpot Expert Reviews"))
|
||||
(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||
(& ndash))))))
|
||||
|
||||
(define (updater wikiname #:strict-proxy? [strict-proxy? #f])
|
||||
;; precompute wikiurl regex for efficency
|
||||
(define wikiurl-regex (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)$" px-wikiname)))
|
||||
;; precompute link replacement string for efficiency
|
||||
(define wiki-substitution (format "/~a\\1" wikiname))
|
||||
|
||||
(define classlist-updater
|
||||
(compose1
|
||||
; uncollapse all navbox items (bottom of page mass navigation)
|
||||
(curry u
|
||||
(λ (classlist) (and ; removed due to scoping, would improve peformance (eq? element-type 'table)
|
||||
(member "navbox" classlist)
|
||||
(member "collapsed" classlist)))
|
||||
(λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))
|
||||
; uncollapse portable-infobox sections
|
||||
(curry u
|
||||
(λ (classlist) (and ; removed due to scoping, would improve performance (eq? element-type 'section)
|
||||
(member "pi-collapse" classlist)))
|
||||
(λ (classlist) (filter (λ (v)
|
||||
(and (not (equal? v "pi-collapse-closed"))
|
||||
(not (equal? v "pi-collapse"))))
|
||||
classlist)))
|
||||
; generic: includes article sections and tables, probably more
|
||||
(curry u
|
||||
(λ (classlist) (and (member "collapsible" classlist)
|
||||
(member "collapsed" classlist)))
|
||||
(λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))))
|
||||
|
||||
(define ((string-replace-curried from to) str)
|
||||
(string-replace str from to))
|
||||
|
||||
(define class-updater
|
||||
(compose1
|
||||
(string-replace-curried " collapsed" "")
|
||||
(string-replace-curried "pi-collapse-closed" "")
|
||||
(string-replace-curried "pi-collapse" "")))
|
||||
|
||||
(define (cardimage-class-updater c)
|
||||
(string-append c " bw-updated-cardtable-cardimage"))
|
||||
|
||||
(define attributes-updater
|
||||
(compose1
|
||||
; uncollapsing
|
||||
#;(curry attribute-maybe-update 'class
|
||||
(λ (class) (string-join (classlist-updater (string-split class " ")) " ")))
|
||||
(curry attribute-maybe-update 'class class-updater)
|
||||
; audio buttons - sample: hearthstone/wiki/Diablo_(Duels_hero)#Sounds
|
||||
(curry u
|
||||
(λ (v) (has-class? "ext-audiobutton" v))
|
||||
(λ (v) (dict-set (dict-remove v 'hidden) 'controls '(""))))
|
||||
; yet more uncollapsing - sample: warframe/wiki/Amp_(Ability)
|
||||
(curry u
|
||||
(λ (v) (and (dict-has-key? v 'id)
|
||||
(string-prefix? (car (dict-ref v 'id)) "mw-customcollapsible")))
|
||||
(λ (v) (dict-set v 'style "display:block")))
|
||||
; change links to stay on the same wiki
|
||||
(curry attribute-maybe-update 'href
|
||||
(λ (href)
|
||||
((compose1
|
||||
(λ (href) (regexp-replace #rx"^(/wiki/.*)$" href wiki-substitution))
|
||||
(λ (href) (regexp-replace wikiurl-regex href "/\\1\\2")))
|
||||
href)))
|
||||
; add noreferrer to a.image
|
||||
(curry u
|
||||
(λ (v) (and #;(eq? element-type 'a)
|
||||
(has-class? "image" v)))
|
||||
(λ (v) (dict-update v 'rel (λ (s)
|
||||
(list (string-append (car s) " noreferrer")))
|
||||
'(""))))
|
||||
; proxy images from inline styles, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v) strict-proxy?)
|
||||
(λ (v) (attribute-maybe-update
|
||||
'style
|
||||
(λ (style)
|
||||
(regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style
|
||||
(λ (whole url)
|
||||
(string-append
|
||||
"url("
|
||||
(u-proxy-url url)
|
||||
")")))) v)))
|
||||
; and also their links, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v)
|
||||
(and strict-proxy?
|
||||
#;(eq? element-type 'a)
|
||||
(or (has-class? "image-thumbnail" v)
|
||||
(has-class? "image" v))))
|
||||
(λ (v) (attribute-maybe-update 'href u-proxy-url v)))
|
||||
; proxy images from src attributes, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v) strict-proxy?)
|
||||
(λ (v) (attribute-maybe-update 'src u-proxy-url v)))
|
||||
; don't lazyload images
|
||||
(curry u
|
||||
(λ (v) (dict-has-key? v 'data-src))
|
||||
(λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v)))
|
||||
; don't use srcset - TODO: use srcset?
|
||||
(λ (v) (dict-remove v 'srcset))))
|
||||
|
||||
(define (children-updater attributes children)
|
||||
; more uncollapsing - sample: bandori/wiki/BanG_Dream!_Wikia
|
||||
((λ (children)
|
||||
(u
|
||||
(λ (v) (has-class? "mw-collapsible-content" attributes))
|
||||
(λ (v) (for/list ([element v])
|
||||
(u (λ (element) (element-is-element? element))
|
||||
(λ (element)
|
||||
`(,(car element)
|
||||
(@ ,@(attribute-maybe-update 'style (λ (a) (regexp-replace #rx"display: *none" a "display:inline")) (bits->attributes element)))
|
||||
,@(filter element-is-content? (cdr element))))
|
||||
element)))
|
||||
children))
|
||||
; wrap blinking animated images in a slot so they can be animated with CSS
|
||||
((λ (children)
|
||||
(u
|
||||
(λ (v) (and (has-class? "animated" attributes)
|
||||
((length v) . > . 1)))
|
||||
(λ (v)
|
||||
`((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v))))
|
||||
(span (@ (class "animated-slot__inner"))
|
||||
,@v))))
|
||||
children))
|
||||
children)))
|
||||
|
||||
(define (updater element element-type attributes children)
|
||||
;; replace whole element?
|
||||
(cond
|
||||
; wrap tables in a div.table-scroller
|
||||
[(and (eq? element-type 'table)
|
||||
(has-class? "wikitable" attributes)
|
||||
(not (dict-has-key? attributes 'data-scrolling)))
|
||||
`(div
|
||||
((class "table-scroller"))
|
||||
((,element-type (@ (data-scrolling) ,@attributes)
|
||||
,@children)))]
|
||||
; HACK for /yugioh/wiki/Pot_of_Greed: move card images above tables
|
||||
[(and (eq? element-type 'table)
|
||||
(has-class? "cardtable" attributes)
|
||||
(not (has-class? "bw-updated-cardtable-cardimage" attributes)))
|
||||
(define (is-cardimage? t a c) (and (eq? t 'td)
|
||||
(has-class? "cardtable-cardimage" a)))
|
||||
(define cardimage ((query-selector is-cardimage? element)))
|
||||
(if (not cardimage)
|
||||
(list element-type attributes children)
|
||||
(let ([new-cardtable (update-tree
|
||||
(λ (e t a c)
|
||||
(if (is-cardimage? t a c)
|
||||
return-no-element
|
||||
(list t a c)))
|
||||
`(,element-type
|
||||
(@ ,(attribute-maybe-update 'class cardimage-class-updater attributes))
|
||||
,@children))])
|
||||
(list 'div null (list cardimage new-cardtable))))]
|
||||
; exclude empty figcaptions
|
||||
[(and (eq? element-type 'figcaption)
|
||||
(or (eq? (length (filter element-is-element? children)) 0)
|
||||
((query-selector (λ (element-type attributes children)
|
||||
(eq? element-type 'use))
|
||||
element))))
|
||||
return-no-element]
|
||||
; exclude infobox items that are videos, and gallery items that are videos
|
||||
[(and (or (has-class? "pi-item" attributes)
|
||||
(has-class? "wikia-gallery-item" attributes))
|
||||
((query-selector (λ (element-type attributes children)
|
||||
(has-class? "video-thumbnail" attributes))
|
||||
element)))
|
||||
return-no-element]
|
||||
; exclude the invisible brackets after headings
|
||||
[(and (eq? element-type 'span)
|
||||
(has-class? "mw-editsection" attributes))
|
||||
return-no-element]
|
||||
; display a link instead of an iframe
|
||||
[(eq? element-type 'iframe)
|
||||
(define src (car (dict-ref attributes 'src null)))
|
||||
`(a
|
||||
((class "iframe-alternative") (href ,src))
|
||||
(,(format "Embedded media: ~a" src)))]
|
||||
; remove noscript versions of images because they are likely lower quality than the script versions
|
||||
[(and (eq? element-type 'noscript)
|
||||
(match children
|
||||
; either the noscript has a.image as a first child...
|
||||
[(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)]
|
||||
; or the noscript has img as a first child
|
||||
[(list (list 'img _)) #t]
|
||||
[_ #f]))
|
||||
return-no-element]
|
||||
; remove gamespot reviews/ads
|
||||
[(has-class? "reviews" attributes)
|
||||
return-no-element]
|
||||
; remove customcollapsible customtoggle buttons - sample: warframe/wiki/Amp_(Ability)
|
||||
[(and (dict-has-key? attributes 'class) (regexp-match? #rx"^mw-customtoggle-[^ ]* button-c$" (car (dict-ref attributes 'class))))
|
||||
return-no-element]
|
||||
[#t
|
||||
(list element-type
|
||||
;; attributes
|
||||
(attributes-updater #; element-type attributes)
|
||||
;; children
|
||||
(children-updater attributes children))]))
|
||||
|
||||
updater)
|
||||
|
||||
(define (update-tree-wiki tree wikiname #:strict-proxy? [strict-proxy? #f])
|
||||
(update-tree (updater wikiname #:strict-proxy? strict-proxy?) tree))
|
||||
|
||||
(module+ test
|
||||
(define transformed
|
||||
(update-tree-wiki wiki-document "test" #:strict-proxy? #t))
|
||||
; check that wikilinks are changed to be local
|
||||
(check-equal? (get-attribute 'href (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (dict-has-key? a 'data-test-wikilink))
|
||||
transformed))))
|
||||
"/test/wiki/Another_Page")
|
||||
; check that a.image has noreferrer
|
||||
(check-equal? (get-attribute 'rel (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (and (eq? t 'a)
|
||||
(has-class? "image" a)))
|
||||
transformed))))
|
||||
" noreferrer")
|
||||
; check that article collapse sections become uncollapsed
|
||||
(check-equal? (get-attribute 'class (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (dict-has-key? a 'data-test-collapsesection))
|
||||
transformed))))
|
||||
"collapsible collapsetoggle-inline")
|
||||
; check that iframes are gone
|
||||
(check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed)))
|
||||
(check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))]
|
||||
[link ((query-selector (λ (t a c) (eq? t 'a)) alternative))])
|
||||
(get-attribute 'href (bits->attributes link)))
|
||||
"https://example.com/iframe-src")
|
||||
; check that images are proxied
|
||||
(check-equal? (get-attribute 'src (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (eq? t 'img))
|
||||
transformed))))
|
||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png")
|
||||
; check that links to images are proxied
|
||||
(check-equal? (get-attribute 'href (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a)))
|
||||
transformed))))
|
||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
||||
(check-equal? (get-attribute 'href (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (member '(data-test-figure-a) a))
|
||||
transformed))))
|
||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
||||
; check that noscript images are removed
|
||||
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f)
|
||||
; check that gamespot reviews/ads are removed
|
||||
(check-equal? ((query-selector (λ (t a c) (has-class? "reviews" a)) transformed)) #f)
|
||||
; check that (& x) sequences are not broken
|
||||
(check-equal? ((query-selector (λ (t a c) (dict-has-key? a 'data-test-ampersand)) transformed))
|
||||
'(div (@ (data-test-ampersand) (class "mw-collapsible-content"))
|
||||
(& ndash)))
|
||||
; benchmark
|
||||
(when (file-exists? "../storage/Frog.html")
|
||||
(with-input-from-file "../storage/Frog.html"
|
||||
(λ ()
|
||||
(define tree (html->xexp (current-input-port)))
|
||||
(time (length (update-tree-wiki tree "minecraft")))))))
|
|
@ -1,45 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/generator
|
||||
racket/list
|
||||
racket/string
|
||||
json
|
||||
net/http-easy
|
||||
"../lib/html-parsing/main.rkt"
|
||||
"../src/xexpr-utils.rkt"
|
||||
"../src/url-utils.rkt")
|
||||
|
||||
(define output-file "wiki-names.json")
|
||||
(define limit "5000")
|
||||
|
||||
(define (get-page offset)
|
||||
(define res (get (format "https://community.fandom.com/wiki/Special:NewWikis?~a"
|
||||
(params->query `(("offset" . ,offset)
|
||||
("limit" . ,limit))))))
|
||||
(html->xexp (bytes->string/utf-8 (response-body res))))
|
||||
|
||||
(define (convert-list-items gen)
|
||||
(for/list ([item (in-producer gen #f)])
|
||||
; '(li "\n" "\t" (a (@ (href "http://terra-hexalis.fandom.com/")) "Terra Hexalis Wiki") "\n" "\t\t\ten\t")
|
||||
(hasheq 'title (third (fourth item))
|
||||
'link (second (second (second (fourth item))))
|
||||
'lang (string-trim (sixth item)))))
|
||||
|
||||
(define (get-items-recursive [offset ""] [items null])
|
||||
(define page (get-page offset))
|
||||
(define page-content ((query-selector (attribute-selector 'class "mw-spcontent") page)))
|
||||
(define next ((query-selector (attribute-selector 'class "mw-nextlink") page-content)))
|
||||
(define next-offset
|
||||
(if next
|
||||
(second (regexp-match #rx"offset=([0-9]*)" (get-attribute 'href (bits->attributes next))))
|
||||
#f))
|
||||
(define list-item-generator (query-selector (λ (e a c) (eq? e 'li)) page-content))
|
||||
(define these-items (convert-list-items list-item-generator))
|
||||
(define all-items (append items these-items))
|
||||
(printf "page offset \"~a\" has ~a items (~a so far)~n" offset (length these-items) (length all-items))
|
||||
(if next
|
||||
(get-items-recursive next-offset all-items)
|
||||
all-items))
|
||||
|
||||
(call-with-output-file output-file #:exists 'truncate/replace
|
||||
(λ (out)
|
||||
(write-json (get-items-recursive) out)))
|
1
req.rktd
1
req.rktd
|
@ -1 +0,0 @@
|
|||
((local (".")))
|
|
@ -1,27 +1,18 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
racket/list
|
||||
racket/runtime-path
|
||||
racket/string
|
||||
json
|
||||
(require racket/string
|
||||
(prefix-in easy: net/http-easy)
|
||||
html-parsing
|
||||
html-writing
|
||||
web-server/http
|
||||
web-server/http/bindings
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"extwiki-data.rkt"
|
||||
"extwiki-generic.rkt"
|
||||
"static-data.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/pure-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"../lib/url-utils.rkt")
|
||||
"xexpr-utils.rkt"
|
||||
"url-utils.rkt")
|
||||
|
||||
(provide
|
||||
; headers to always send on all http responses
|
||||
always-headers
|
||||
; timeout durations for http-easy requests
|
||||
timeouts
|
||||
; generates a consistent footer
|
||||
application-footer
|
||||
; generates a consistent template for wiki page content to sit in
|
||||
|
@ -31,25 +22,18 @@
|
|||
|
||||
(module+ test
|
||||
(require rackunit
|
||||
html-writing
|
||||
"test-utils.rkt"))
|
||||
html-writing))
|
||||
|
||||
(define always-headers
|
||||
(list (header #"Referrer-Policy" #"same-origin") ; header to not send referers to fandom
|
||||
(header #"Link" (string->bytes/latin-1 link-header))))
|
||||
|
||||
(define-runtime-path path-static "../static")
|
||||
(define theme-icons
|
||||
(for/hasheq ([theme '(default light dark)])
|
||||
(values theme
|
||||
(html->xexp (file->string (build-path path-static (format "icon-theme-~a.svg" theme)) #:mode 'binary)))))
|
||||
(list (header #"Referrer-Policy" #"same-origin"))) ; header to not send referers to fandom
|
||||
(define timeouts (easy:make-timeout-config #:lease 5 #:connect 5))
|
||||
|
||||
(define (application-footer source-url #:license [license-in #f])
|
||||
(define license (or license-in license-default))
|
||||
`(footer (@ (class "custom-footer"))
|
||||
(div (@ (class ,(if source-url "custom-footer__cols" "internal-footer")))
|
||||
(div (p
|
||||
(img (@ (class "my-logo") (src ,(get-static-url "breezewiki.svg")))))
|
||||
(img (@ (class "my-logo") (src "/static/breezewiki.svg"))))
|
||||
(p
|
||||
(a (@ (href "https://gitdab.com/cadence/breezewiki"))
|
||||
,(format "~a source code" (config-get 'application_name))))
|
||||
|
@ -58,212 +42,79 @@
|
|||
"Documentation and more information"))
|
||||
(p
|
||||
(a (@ (href "https://lists.sr.ht/~cadence/breezewiki-discuss"))
|
||||
"Chat / Bug reports / Feature requests"))
|
||||
,(if (config-member? 'promotions::indie_wiki_buddy "footer")
|
||||
`(p
|
||||
(a (@ (href "https://getindie.wiki/"))
|
||||
"Get Indie Wiki Buddy browser extension - be redirected to BreezeWiki every time!"))
|
||||
"")
|
||||
"Discussions / Bug reports / Feature requests"))
|
||||
,(if (config-true? 'instance_is_official)
|
||||
`(p ,(format "This instance is run by the ~a developer, " (config-get 'application_name))
|
||||
(a (@ (href "https://cadence.moe/contact"))
|
||||
"Cadence")
|
||||
".")
|
||||
"Cadence."))
|
||||
`(p
|
||||
,(format "This unofficial instance is based off the ~a source code, but is not controlled by the code developer." (config-get 'application_name)))))
|
||||
,(if source-url
|
||||
`(div (p "This page displays proxied content from "
|
||||
(a (@ (href ,source-url) (rel "nofollow noreferrer")) ,source-url)
|
||||
(a (@ (href ,source-url) (rel "noreferrer")) ,source-url)
|
||||
,(format ". Text content is available under the ~a license, " (license^-text license))
|
||||
(a (@ (href ,(license^-url license)) (rel "nofollow")) "see license info.")
|
||||
(a (@ (href ,(license^-url license))) "see license info.")
|
||||
" Media files may have different copying restrictions.")
|
||||
(p ,(format "Fandom is a trademark of Fandom, Inc. ~a is not affiliated with Fandom." (config-get 'application_name))))
|
||||
`(div (p "Text content on wikis run by Fandom is available under the Creative Commons Attribution-Share Alike License 3.0 (Unported), "
|
||||
(a (@ (href "https://www.fandom.com/licensing") (rel "nofollow")) "see license info.")
|
||||
(a (@ (href "https://www.fandom.com/licensing")) "see license info.")
|
||||
" Media files and official Fandom documents have different copying restrictions.")
|
||||
(p ,(format "Fandom is a trademark of Fandom, Inc. ~a is not affiliated with Fandom." (config-get 'application_name))))))))
|
||||
|
||||
;; generate a notice with a link if a fandom wiki has a replacement as part of NIWA or similar
|
||||
;; if the wiki has no replacement, display nothing
|
||||
(define (extwiki-notice wikiname title req user-cookies)
|
||||
(define xt (findf (λ (item) (member wikiname (extwiki^-wikinames item))) extwikis))
|
||||
(cond/var
|
||||
[xt
|
||||
(let* ([seen? (member wikiname (user-cookies^-notices user-cookies))]
|
||||
[aside-class (if seen? "niwa__notice niwa--seen" "niwa__notice")]
|
||||
[group (hash-ref extwiki-groups (extwiki^-group xt))]
|
||||
[search-page (format "/Special:Search?~a"
|
||||
(params->query `(("search" . ,title)
|
||||
("go" . "Go"))))]
|
||||
[go (if (string-suffix? (extwiki^-home xt) "/")
|
||||
(regexp-replace #rx"/$" (extwiki^-home xt) (λ (_) search-page))
|
||||
(let* ([joiner (second (regexp-match #rx"/(w[^./]*)/" (extwiki^-home xt)))])
|
||||
(regexp-replace #rx"/w[^./]*/.*$" (extwiki^-home xt) (λ (_) (format "/~a~a" joiner search-page)))))]
|
||||
[props (extwiki-props^ go)])
|
||||
(cond
|
||||
[(eq? (extwiki^-banner xt) 'default)
|
||||
`(aside (@ (class ,aside-class))
|
||||
(h1 (@ (class "niwa__header")) ,(extwiki^-name xt) " has its own website separate from Fandom.")
|
||||
(a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,(extwiki^-name xt) " →")
|
||||
(div (@ (class "niwa__cols"))
|
||||
(div (@ (class "niwa__left"))
|
||||
(p ,((extwiki^-description xt) props))
|
||||
(p ,((extwiki-group^-description group) props))
|
||||
(p "This wiki's core community has largely migrated away from Fandom. You should "
|
||||
(a (@ (href ,go)) "go to " ,(extwiki^-name xt) " now!"))
|
||||
(p (@ (class "niwa__feedback"))
|
||||
,@(add-between
|
||||
`(,@(for/list ([link (extwiki-group^-links group)])
|
||||
`(a (@ (href ,(cdr link))) ,(car link)))
|
||||
"This notice is from BreezeWiki"
|
||||
(a (@ (rel "nofollow")
|
||||
(class "niwa__got-it")
|
||||
(href ,(user-cookies-setter-url/add-notice req user-cookies wikiname)))
|
||||
"OK, got it"))
|
||||
" / ")))
|
||||
(div (@ (class "niwa__right"))
|
||||
(img (@ (class "niwa__logo") (src ,(extwiki^-logo xt)))))))]
|
||||
[(eq? (extwiki^-banner xt) 'parallel)
|
||||
`(aside (@ (class "niwa__parallel"))
|
||||
(h1 (@ (class "niwa__header-mini"))
|
||||
"See also "
|
||||
(a (@ (href ,go)) ,(extwiki^-name xt)))
|
||||
(p "This topic has multiple communities of editors, some active on the Fandom wiki, others active on " ,(extwiki^-name xt) ".")
|
||||
(p "For thorough research, be sure to check both communities since they may have different information!")
|
||||
(p (@ (class "niwa__feedback"))
|
||||
,@(add-between
|
||||
`(,@(for/list ([link (extwiki-group^-links group)])
|
||||
`(a (@ (href ,(cdr link))) ,(car link)))
|
||||
"This notice is from BreezeWiki"
|
||||
(a (@ (href "https://docs.breezewiki.com/Reporting_Bugs.html")) "Feedback?"))
|
||||
" / ")))]
|
||||
[(eq? (extwiki^-banner xt) 'empty)
|
||||
`(aside (@ (class "niwa__notice niwa__notice--alt"))
|
||||
(h1 (@ (class "niwa__header")) "You will be redirected to " ,(extwiki^-name xt) ".")
|
||||
(p (@ (style "position: relative; top: -12px;")) "This independent wiki community has its own site separate from Fandom.")
|
||||
(a (@ (class "niwa__go") (href ,go)) "Take me there! →")
|
||||
|
||||
(p (@ (class "niwa__feedback") (style "text-align: left"))
|
||||
,@(add-between
|
||||
`(,@(for/list ([link (extwiki-group^-links group)])
|
||||
`(a (@ (href ,(cdr link))) ,(car link)))
|
||||
"This notice is from BreezeWiki")
|
||||
" / ")))]))]
|
||||
(var fetched-callback (get-redirect-content wikiname))
|
||||
[fetched-callback
|
||||
(fetched-callback title)]
|
||||
[#t ""]))
|
||||
|
||||
(define (generate-wiki-page
|
||||
content
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:head-data [head-data-in #f]
|
||||
#:siteinfo [siteinfo-in #f]
|
||||
#:user-cookies [user-cookies-in #f]
|
||||
#:online-styles [online-styles #t])
|
||||
#:body-class [body-class-in #f]
|
||||
#:siteinfo [siteinfo-in #f])
|
||||
(define siteinfo (or siteinfo-in siteinfo-default))
|
||||
(define head-data (or head-data-in ((head-data-getter wikiname))))
|
||||
(define user-cookies (or user-cookies-in (user-cookies-getter req)))
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(define required-styles
|
||||
(cond
|
||||
[online-styles
|
||||
(define styles
|
||||
(list
|
||||
(format "~a/wikia.php?controller=ThemeApi&method=themeVariables&variant=~a" origin (user-cookies^-theme user-cookies))
|
||||
(format "~a/load.php?lang=en&modules=site.styles%7Cskin.fandomdesktop.styles%7Cext.fandom.PortableInfoboxFandomDesktop.css%7Cext.fandom.GlobalComponents.CommunityHeaderBackground.css%7Cext.fandom.photoGallery.gallery.css%7Cext.gadget.site-styles%2Csound-styles&only=styles&skin=fandomdesktop" origin)))
|
||||
(if (config-true? 'strict_proxy)
|
||||
(map u-proxy-url styles)
|
||||
styles)]
|
||||
[#t
|
||||
(list
|
||||
(format "/archive/~a/styles/themeVariables-~a.css" wikiname (user-cookies^-theme user-cookies))
|
||||
(format "/archive/~a/styles/site.css" wikiname))]))
|
||||
`(*TOP*
|
||||
(*DECL* DOCTYPE html)
|
||||
(html
|
||||
(head
|
||||
(meta (@ (name "viewport") (content "width=device-width, initial-scale=1")))
|
||||
(title ,(format "~a | ~a+~a"
|
||||
title
|
||||
(regexp-replace #rx" ?Wiki$" (siteinfo^-sitename siteinfo) "")
|
||||
(config-get 'application_name)))
|
||||
,@(map (λ (url)
|
||||
`(link (@ (rel "stylesheet") (type "text/css") (href ,url))))
|
||||
required-styles)
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "main.css"))))
|
||||
(script "const BWData = "
|
||||
,(jsexpr->string (hasheq 'wikiname wikiname
|
||||
'strict_proxy (config-true? 'strict_proxy))))
|
||||
,(if (config-true? 'feature_search_suggestions)
|
||||
`(script (@ (type "module") (src ,(get-static-url "search-suggestions.js"))))
|
||||
"")
|
||||
(script (@ (type "module") (src ,(get-static-url "countdown.js"))))
|
||||
(script (@ (defer) (src ,(get-static-url "tabs.js"))))
|
||||
(link (@ (rel "icon") (href ,(u (λ (v) (config-true? 'strict_proxy))
|
||||
(λ (v) (u-proxy-url v))
|
||||
(head-data^-icon-url head-data))))))
|
||||
(body (@ (class ,(head-data^-body-class head-data) " bw-tabs-nojs"))
|
||||
,(let ([extension-eligible?
|
||||
(cond/var
|
||||
[(not req) #f]
|
||||
[(not (config-member? 'promotions::indie_wiki_buddy "banner")) #f]
|
||||
(var ua-pair (assq 'user-agent (request-headers req)))
|
||||
[(not ua-pair) #f]
|
||||
(var ua (string-downcase (cdr ua-pair)))
|
||||
;; everyone pretends to be chrome, so we do it in reverse
|
||||
;; this excludes common browsers that don't support the extension
|
||||
[#t (and (not (string-contains? ua "edge/"))
|
||||
(not (string-contains? ua "mobile")))])])
|
||||
(if extension-eligible?
|
||||
`(div (@ (class "bw-top-banner"))
|
||||
(div (@ (class "bw-top-banner-rainbow"))
|
||||
"Try " (a (@ (href "https://getindie.wiki/") (target "_blank")) "our affiliated browser extension") " - redirect to BreezeWiki automatically!\n"))
|
||||
""))
|
||||
(div (@ (class "main-container"))
|
||||
(div (@ (class "fandom-community-header__background tileHorizontally header")))
|
||||
(div (@ (class "page"))
|
||||
(main (@ (class "page__main"))
|
||||
,(extwiki-notice wikiname title req user-cookies)
|
||||
(div (@ (class "custom-top"))
|
||||
(h1 (@ (class "page-title")) ,title)
|
||||
(nav (@ (class "sitesearch"))
|
||||
(form (@ (action ,(format "/~a/search" wikiname))
|
||||
(class "bw-search-form")
|
||||
(id "bw-pr-search-form"))
|
||||
(label (@ (for "bw-search-input")) "Search ")
|
||||
(div (@ (id "bw-pr-search-input"))
|
||||
(input (@ (type "text") (name "q") (id "bw-search-input") (autocomplete "off"))))
|
||||
(div (@ (class "bw-ss__container") (id "bw-pr-search-suggestions"))))
|
||||
(div (@ (class "bw-theme__select"))
|
||||
(span (@ (class "bw-theme__main-label")) "Page theme")
|
||||
(span (@ (class "bw-theme__items"))
|
||||
,@(for/list ([theme '(default light dark)])
|
||||
(define class
|
||||
(if (equal? theme (user-cookies^-theme user-cookies))
|
||||
"bw-theme__item bw-theme__item--selected"
|
||||
"bw-theme__item"))
|
||||
`(a (@ (rel "nofollow")
|
||||
(href ,(user-cookies-setter-url
|
||||
req
|
||||
(struct-copy user-cookies^ user-cookies
|
||||
[theme theme]))) (class ,class))
|
||||
(span (@ (class "bw-theme__icon-container"))
|
||||
,(hash-ref theme-icons theme))
|
||||
,(format "~a" theme)))))))
|
||||
(div (@ (id "content") #;(class "page-content"))
|
||||
(div (@ (id "mw-content-text"))
|
||||
,content))
|
||||
,(application-footer source-url #:license (siteinfo^-license siteinfo)))))))))
|
||||
(define body-class (if (not body-class-in)
|
||||
"skin-fandomdesktop"
|
||||
body-class-in))
|
||||
(define (required-styles origin)
|
||||
(map (λ (dest-path)
|
||||
(define url (format dest-path origin))
|
||||
(if (config-true? 'strict_proxy)
|
||||
(u-proxy-url url)
|
||||
url))
|
||||
'(#;"~a/load.php?lang=en&modules=skin.fandomdesktop.styles&only=styles&skin=fandomdesktop"
|
||||
#;"~a/load.php?lang=en&modules=ext.gadget.dungeonsWiki%2CearthWiki%2Csite-styles%2Csound-styles&only=styles&skin=fandomdesktop"
|
||||
#;"~a/load.php?lang=en&modules=site.styles&only=styles&skin=fandomdesktop"
|
||||
; combine the above entries into a single request for potentially extra speed - fandom.com doesn't even do this!
|
||||
"~a/wikia.php?controller=ThemeApi&method=themeVariables"
|
||||
"~a/load.php?lang=en&modules=skin.fandomdesktop.styles%7Cext.fandom.PortableInfoboxFandomDesktop.css%7Cext.fandom.GlobalComponents.CommunityHeaderBackground.css%7Cext.gadget.site-styles%2Csound-styles%7Csite.styles&only=styles&skin=fandomdesktop")))
|
||||
`(html
|
||||
(head
|
||||
(meta (@ (name "viewport") (content "width=device-width, initial-scale=1")))
|
||||
(title ,(format "~a | ~a+~a"
|
||||
title
|
||||
(regexp-replace #rx" ?Wiki$" (siteinfo^-sitename siteinfo) "")
|
||||
(config-get 'application_name)))
|
||||
,@(map (λ (url)
|
||||
`(link (@ (rel "stylesheet") (type "text/css") (href ,url))))
|
||||
(required-styles (format "https://~a.fandom.com" wikiname)))
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href "/static/main.css"))))
|
||||
(body (@ (class ,body-class))
|
||||
(div (@ (class "main-container"))
|
||||
(div (@ (class "fandom-community-header__background tileHorizontally header")))
|
||||
(div (@ (class "page"))
|
||||
(main (@ (class "page__main"))
|
||||
(div (@ (class "custom-top"))
|
||||
(h1 (@ (class "page-title")) ,title)
|
||||
(nav (@ (class "sitesearch"))
|
||||
(form (@ (action ,(format "/~a/search" wikiname)))
|
||||
(label "Search "
|
||||
(input (@ (type "text") (name "q")))))))
|
||||
(div (@ (id "content") #;(class "page-content"))
|
||||
(div (@ (id "mw-content-text"))
|
||||
,content))
|
||||
,(application-footer source-url #:license (siteinfo^-license siteinfo))))))))
|
||||
(module+ test
|
||||
(define page
|
||||
(parameterize ([(config-parameter 'strict_proxy) "true"])
|
||||
(generate-wiki-page
|
||||
'(template)
|
||||
#:req test-req
|
||||
#:source-url ""
|
||||
#:title "test"
|
||||
#:wikiname "test")))
|
||||
|
@ -278,11 +129,11 @@
|
|||
page))))
|
||||
"/proxy?dest=https%3A%2F%2Ftest.fandom.com")))
|
||||
|
||||
(define (generate-redirect dest #:headers [headers-in '()])
|
||||
(define (generate-redirect dest)
|
||||
(define dest-bytes (string->bytes/utf-8 dest))
|
||||
(response/output
|
||||
#:code 302
|
||||
#:headers (append (list (header #"Location" dest-bytes)) headers-in)
|
||||
#:headers (list (header #"Location" dest-bytes))
|
||||
(λ (out)
|
||||
(write-html
|
||||
`(html
|
||||
|
|
102
src/config.rkt
102
src/config.rkt
|
@ -2,17 +2,19 @@
|
|||
(require racket/function
|
||||
racket/pretty
|
||||
racket/runtime-path
|
||||
racket/string
|
||||
typed/ini)
|
||||
racket/string)
|
||||
(require/typed ini
|
||||
[#:opaque Ini ini?]
|
||||
[read-ini (Input-Port -> Ini)]
|
||||
[ini->hash (Ini -> (Immutable-HashTable Symbol (Immutable-HashTable Symbol String)))])
|
||||
|
||||
(provide
|
||||
config-parameter
|
||||
config-true?
|
||||
config-member?
|
||||
config-get)
|
||||
|
||||
(module+ test
|
||||
(require "../lib/typed-rackunit.rkt"))
|
||||
(require "typed-rackunit.rkt"))
|
||||
|
||||
(define-runtime-path path-config "../config.ini")
|
||||
|
||||
|
@ -24,87 +26,71 @@
|
|||
(define (config-true? key)
|
||||
(not (member ((config-parameter key)) '("" "false"))))
|
||||
|
||||
(: config-member? (Symbol String [#:sep String] -> Boolean))
|
||||
(define (config-member? key item #:sep [sep #px"\\s+"])
|
||||
(and (config-true? key)
|
||||
(not (not (member item (string-split (config-get key) sep))))))
|
||||
|
||||
(: config-get (Symbol -> String))
|
||||
(define (config-get key)
|
||||
((config-parameter key)))
|
||||
|
||||
(define default-config
|
||||
'((application_name . "BreezeWiki")
|
||||
(bind_host . "auto")
|
||||
(port . "10416")
|
||||
(canonical_origin . "")
|
||||
(debug . "false")
|
||||
(feature_search_suggestions . "true")
|
||||
(instance_is_official . "false") ; please don't turn this on, or you will make me very upset
|
||||
(log_outgoing . "true")
|
||||
(strict_proxy . "false")
|
||||
|
||||
(feature_offline::enabled . "false")
|
||||
(feature_offline::format . "json.gz")
|
||||
(feature_offline::only . "false")
|
||||
(feature_offline::search . "fandom")
|
||||
|
||||
(access_log::enabled . "false")
|
||||
|
||||
(promotions::indie_wiki_buddy . "banner home")))
|
||||
(port . "10416")
|
||||
(strict_proxy . "true")))
|
||||
|
||||
(define loaded-alist
|
||||
(with-handlers
|
||||
([exn:fail:filesystem:errno?
|
||||
(λ (exn)
|
||||
(displayln "note: config file not detected, using defaults")
|
||||
'())]
|
||||
(begin0
|
||||
'()
|
||||
(displayln "note: config file not detected, using defaults")))]
|
||||
[exn:fail:contract?
|
||||
(λ (exn)
|
||||
(displayln "note: config file empty or missing [] section, using defaults")
|
||||
'())])
|
||||
(define h (in-hash
|
||||
(ini->hash
|
||||
(call-with-input-file path-config
|
||||
(λ (in)
|
||||
(read-ini in))))))
|
||||
(begin0
|
||||
'()
|
||||
(displayln "note: config file empty or missing [] section, using defaults")))])
|
||||
(define l
|
||||
(for*/list : (Listof (Pairof Symbol String))
|
||||
([(section-key section) h]
|
||||
[(key value) (in-hash section)])
|
||||
(if (eq? section-key '||)
|
||||
(cons key value)
|
||||
(cons (string->symbol (string-append (symbol->string section-key)
|
||||
"::"
|
||||
(symbol->string key)))
|
||||
value))))
|
||||
(printf "note: ~a items loaded from config file~n" (length l))
|
||||
l))
|
||||
(hash->list
|
||||
(hash-ref
|
||||
(ini->hash
|
||||
(call-with-input-file path-config
|
||||
(λ (in)
|
||||
(read-ini in))))
|
||||
'||)))
|
||||
(begin0
|
||||
l
|
||||
(printf "note: ~a items loaded from config file~n" (length l)))))
|
||||
|
||||
(define env-alist
|
||||
(for/list : (Listof (Pairof Symbol String))
|
||||
([name (environment-variables-names (current-environment-variables))]
|
||||
#:when (string-prefix? (string-downcase (bytes->string/latin-1 name)) "bw_"))
|
||||
(cons
|
||||
;; key: convert to string, remove bw_ prefix, convert to symbol
|
||||
(string->symbol (string-downcase (substring (bytes->string/latin-1 name) 3)))
|
||||
;; value: convert to string
|
||||
(bytes->string/latin-1
|
||||
(cast (environment-variables-ref (current-environment-variables) name) Bytes)))))
|
||||
(let ([e-names (environment-variables-names (current-environment-variables))]
|
||||
[e-ref (λ ([name : Bytes])
|
||||
(bytes->string/latin-1
|
||||
(cast (environment-variables-ref (current-environment-variables) name)
|
||||
Bytes)))])
|
||||
(map (λ ([name : Bytes])
|
||||
(cons (string->symbol (string-downcase (substring (bytes->string/latin-1 name) 3)))
|
||||
(e-ref name)))
|
||||
(filter (λ ([name : Bytes]) (string-prefix? (string-downcase (bytes->string/latin-1 name))
|
||||
"bw_"))
|
||||
e-names))))
|
||||
(when (> (length env-alist) 0)
|
||||
(printf "note: ~a items loaded from environment variables~n" (length env-alist)))
|
||||
|
||||
(define combined-alist (append default-config loaded-alist env-alist))
|
||||
|
||||
(define config
|
||||
(for/hasheq ([pair combined-alist]) : (Immutable-HashTable Symbol (Parameter String))
|
||||
(values (car pair) (make-parameter (cdr pair)))))
|
||||
(make-immutable-hasheq
|
||||
(map (λ ([pair : (Pairof Symbol String)])
|
||||
(cons (car pair) (make-parameter (cdr pair))))
|
||||
combined-alist)))
|
||||
|
||||
(when (config-true? 'debug)
|
||||
; all values here are optimised for maximum prettiness
|
||||
(parameterize ([pretty-print-columns 80])
|
||||
(display "config: ")
|
||||
(pretty-write ((inst sort (Pairof Symbol String) Symbol)
|
||||
(pretty-write ((inst sort (Pairof Symbol String))
|
||||
(hash->list (make-immutable-hasheq combined-alist))
|
||||
symbol<?
|
||||
#:key car))))
|
||||
|
@ -118,10 +104,6 @@
|
|||
(module+ test
|
||||
; this is just a sanity check
|
||||
(parameterize ([(config-parameter 'application_name) "JeffWiki"]
|
||||
[(config-parameter 'strict_proxy) ""]
|
||||
[(config-parameter 'promotions::indie_wiki_buddy) "a b c"])
|
||||
[(config-parameter 'strict_proxy) ""])
|
||||
(check-equal? (config-get 'application_name) "JeffWiki")
|
||||
(check-false (config-true? 'strict_proxy))
|
||||
(check-equal? (string? (config-get 'feature_offline::format)) #t)
|
||||
(check-true (config-member? 'promotions::indie_wiki_buddy "b"))))
|
||||
|
||||
(check-false (config-true? 'strict_proxy))))
|
||||
|
|
132
src/data.rkt
132
src/data.rkt
|
@ -1,130 +1,36 @@
|
|||
#lang racket/base
|
||||
(require racket/list
|
||||
racket/match
|
||||
racket/string
|
||||
web-server/http/request-structs
|
||||
net/url-string
|
||||
(only-in net/cookies/server cookie-header->alist cookie->set-cookie-header make-cookie)
|
||||
(prefix-in easy: net/http-easy)
|
||||
db
|
||||
memo
|
||||
"fandom-request.rkt"
|
||||
"static-data.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"../archiver/archiver-database.rkt"
|
||||
"config.rkt")
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
(struct-out siteinfo^)
|
||||
(struct-out license^)
|
||||
(struct-out head-data^)
|
||||
(struct-out user-cookies^)
|
||||
siteinfo-fetch
|
||||
siteinfo-default
|
||||
license-default
|
||||
head-data-getter
|
||||
head-data-default
|
||||
user-cookies-getter
|
||||
user-cookies-default
|
||||
user-cookies-setter
|
||||
user-cookies-setter-url
|
||||
user-cookies-setter-url/add-notice)
|
||||
license-default)
|
||||
|
||||
(struct siteinfo^ (sitename basepage license) #:transparent)
|
||||
(struct license^ (text url) #:transparent)
|
||||
(struct head-data^ (body-class icon-url) #:transparent)
|
||||
|
||||
(define license-default (license^ "CC-BY-SA" "https://www.fandom.com/licensing"))
|
||||
(define siteinfo-default (siteinfo^ "Unknown Wiki" "Main_Page" license-default))
|
||||
(define head-data-default (head-data^ "skin-fandomdesktop" (get-static-url "breezewiki-favicon.svg")))
|
||||
|
||||
(when (config-true? 'feature_offline::only)
|
||||
(void (get-slc)))
|
||||
(define siteinfo-default (siteinfo^ "Test Wiki" "Main_Page" license-default))
|
||||
|
||||
(define/memoize (siteinfo-fetch wikiname) #:hash hash
|
||||
(cond
|
||||
[(config-true? 'feature_offline::only)
|
||||
(when (config-true? 'debug)
|
||||
(printf "using offline mode for siteinfo ~a~n" wikiname))
|
||||
(define row (query-maybe-row* "select sitename, basepage, license_text, license_url from wiki where wikiname = ?"
|
||||
wikiname))
|
||||
(if row
|
||||
(siteinfo^ (vector-ref row 0)
|
||||
(vector-ref row 1)
|
||||
(license^ (vector-ref row 2)
|
||||
(vector-ref row 3)))
|
||||
siteinfo-default)]
|
||||
[else
|
||||
(define res
|
||||
(fandom-get-api
|
||||
wikiname
|
||||
'(("action" . "query")
|
||||
("meta" . "siteinfo")
|
||||
("siprop" . "general|rightsinfo")
|
||||
("format" . "json")
|
||||
("formatversion" . "2"))))
|
||||
(cond [(= (easy:response-status-code res) 200)
|
||||
(define data (easy:response-json res))
|
||||
(siteinfo^ (jp "/query/general/sitename" data)
|
||||
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
|
||||
(license^ (jp "/query/rightsinfo/text" data)
|
||||
(jp "/query/rightsinfo/url" data)))]
|
||||
[else siteinfo-default])]))
|
||||
|
||||
(define/memoize (head-data-getter wikiname) #:hash hash
|
||||
;; data will be stored here, can be referenced by the memoized closure
|
||||
(define this-data head-data-default)
|
||||
;; returns the getter
|
||||
(λ ([res-in #f])
|
||||
(when res-in
|
||||
;; when actual information is provided, parse it into the struct and store it for the future
|
||||
(define head-html (jp "/parse/headhtml" res-in ""))
|
||||
(define data
|
||||
(head-data^
|
||||
(match (regexp-match #rx"<body [^>]*class=\"([^\"]*)" head-html)
|
||||
[(list _ classes) classes]
|
||||
[_ (head-data^-body-class head-data-default)])
|
||||
(match (regexp-match #rx"<link rel=\"(?:shortcut )?icon\" href=\"([^\"]*)" head-html)
|
||||
[(list _ icon-url) icon-url]
|
||||
[_ (head-data^-icon-url head-data-default)])))
|
||||
(set! this-data data))
|
||||
;; then no matter what, return the best information we have so far
|
||||
this-data))
|
||||
|
||||
(struct user-cookies^ (theme notices) #:prefab)
|
||||
(define user-cookies-default (user-cookies^ 'default '()))
|
||||
(define (user-cookies-getter req)
|
||||
(define cookie-header (headers-assq* #"cookie" (request-headers/raw req)))
|
||||
(define cookies-alist (if cookie-header (cookie-header->alist (header-value cookie-header) bytes->string/utf-8) null))
|
||||
(define cookies-hash
|
||||
(for/hasheq ([pair cookies-alist])
|
||||
(match pair
|
||||
[(cons "theme" (and theme (or "light" "dark" "default")))
|
||||
(values 'theme (string->symbol theme))]
|
||||
[(cons "notices" notices)
|
||||
(values 'notices (string-split notices "|"))]
|
||||
[_ (values #f #f)])))
|
||||
(user-cookies^
|
||||
(hash-ref cookies-hash 'theme (user-cookies^-theme user-cookies-default))
|
||||
(hash-ref cookies-hash 'notices (user-cookies^-notices user-cookies-default))))
|
||||
|
||||
(define (user-cookies-setter user-cookies)
|
||||
(map (λ (c) (header #"Set-Cookie" (cookie->set-cookie-header c)))
|
||||
(list (make-cookie "theme" (symbol->string (user-cookies^-theme user-cookies))
|
||||
#:path "/"
|
||||
#:max-age (* 60 60 24 365 10))
|
||||
(make-cookie "notices" (string-join (user-cookies^-notices user-cookies) "|")
|
||||
|
||||
#:path "/"
|
||||
#:max-age (* 60 60 24 365 10)))))
|
||||
|
||||
(define (user-cookies-setter-url req new-settings)
|
||||
(format "/set-user-settings?~a" (params->query `(("next_location" . ,(url->string (request-uri req)))
|
||||
("new_settings" . ,(format "~s" new-settings))))))
|
||||
|
||||
(define (user-cookies-setter-url/add-notice req user-cookies notice-name)
|
||||
(user-cookies-setter-url
|
||||
req
|
||||
(struct-copy user-cookies^ user-cookies
|
||||
[notices (cons notice-name (user-cookies^-notices user-cookies))])))
|
||||
(define dest-url
|
||||
(format "https://~a.fandom.com/api.php?~a"
|
||||
wikiname
|
||||
(params->query '(("action" . "query")
|
||||
("meta" . "siteinfo")
|
||||
("siprop" . "general|rightsinfo")
|
||||
("format" . "json")
|
||||
("formatversion" . "2")))))
|
||||
(log-outgoing dest-url)
|
||||
(define res (easy:get dest-url))
|
||||
(define data (easy:response-json res))
|
||||
(siteinfo^ (jp "/query/general/sitename" data)
|
||||
(second (regexp-match #rx"/wiki/(.*)" (jp "/query/general/base" data)))
|
||||
(license^ (jp "/query/rightsinfo/text" data)
|
||||
(jp "/query/rightsinfo/url" data))))
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
#lang racket/base
|
||||
(require "../lib/syntax.rkt"
|
||||
(require "syntax.rkt"
|
||||
(for-syntax racket/base)
|
||||
racket/string
|
||||
net/url
|
||||
web-server/http
|
||||
web-server/dispatchers/dispatch
|
||||
(prefix-in host: web-server/dispatchers/dispatch-host)
|
||||
(prefix-in pathprocedure: web-server/dispatchers/dispatch-pathprocedure)
|
||||
(prefix-in sequencer: web-server/dispatchers/dispatch-sequencer)
|
||||
(prefix-in lift: web-server/dispatchers/dispatch-lift)
|
||||
(prefix-in filter: web-server/dispatchers/dispatch-filter)
|
||||
"config.rkt"
|
||||
"../lib/url-utils.rkt")
|
||||
"url-utils.rkt")
|
||||
|
||||
(provide
|
||||
; syntax to make the hashmap from names
|
||||
|
@ -33,48 +31,23 @@
|
|||
; don't forget that I'm returning *code* - return a call to the function
|
||||
(datum->syntax stx `(make-dispatcher-tree ,ds)))
|
||||
|
||||
; guard that the page returned a response, otherwise print more detailed debugging information
|
||||
(define-syntax-rule (page ds name)
|
||||
(λ (req)
|
||||
(define dispatcher (hash-ref ds (quote name)))
|
||||
(define page-response (dispatcher req))
|
||||
(if (response? page-response)
|
||||
page-response
|
||||
(response/output
|
||||
#:code 500
|
||||
#:mime-type #"text/plain"
|
||||
(λ (out)
|
||||
(for ([port (list (current-error-port) out)])
|
||||
(parameterize ([current-output-port port])
|
||||
(printf "error in ~a:~n expected page to return a response~n actually returned: ~v~n"
|
||||
(quote name)
|
||||
page-response))))))))
|
||||
|
||||
(define (make-dispatcher-tree ds)
|
||||
(define subdomain-dispatcher (hash-ref ds 'subdomain-dispatcher))
|
||||
(define tree
|
||||
(sequencer:make
|
||||
subdomain-dispatcher
|
||||
(pathprocedure:make "/" (page ds page-home))
|
||||
(pathprocedure:make "/proxy" (page ds page-proxy))
|
||||
(pathprocedure:make "/search" (page ds page-global-search))
|
||||
(pathprocedure:make "/set-user-settings" (page ds page-set-user-settings))
|
||||
(pathprocedure:make "/buddyfight/wiki/It_Doesn't_Work!!" (page ds page-it-works))
|
||||
(filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (page ds page-category)))
|
||||
(filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (page ds page-file)))
|
||||
(if (config-true? 'feature_offline::enabled)
|
||||
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki-offline)))
|
||||
(λ (_conn _req) (next-dispatcher)))
|
||||
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (page ds page-wiki)))
|
||||
(filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (page ds page-search)))
|
||||
(filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (page ds redirect-wiki-home)))
|
||||
(if (config-true? 'feature_offline::enabled)
|
||||
(filter:make (pregexp (format "^/archive/~a/(styles|images)/.+$" px-wikiname)) (lift:make (page ds page-static-archive)))
|
||||
(λ (_conn _req) (next-dispatcher)))
|
||||
(hash-ref ds 'static-dispatcher)
|
||||
(lift:make (hash-ref ds 'page-not-found))))
|
||||
(make-semicolon-fixer-dispatcher tree))
|
||||
|
||||
(define ((make-semicolon-fixer-dispatcher orig-dispatcher) conn orig-req)
|
||||
(define new-req (struct-copy request orig-req [uri (fix-semicolons-url (request-uri orig-req))]))
|
||||
(orig-dispatcher conn new-req))
|
||||
(host:make
|
||||
(λ (host-sym)
|
||||
(if/out (config-true? 'canonical_origin)
|
||||
(let* ([host-header (symbol->string host-sym)]
|
||||
[splitter (string-append "." (url-host (string->url (config-get 'canonical_origin))))]
|
||||
[s (string-split host-header splitter #:trim? #f)])
|
||||
(if/in (and (eq? 2 (length s)) (equal? "" (cadr s)))
|
||||
((hash-ref ds 'subdomain-dispatcher) (car s))))
|
||||
(sequencer:make
|
||||
(pathprocedure:make "/" (hash-ref ds 'page-home))
|
||||
(pathprocedure:make "/proxy" (hash-ref ds 'page-proxy))
|
||||
(pathprocedure:make "/search" (hash-ref ds 'page-global-search))
|
||||
(filter:make (pregexp (format "^/~a/wiki/Category:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-category)))
|
||||
(filter:make (pregexp (format "^/~a/wiki/File:.+$" px-wikiname)) (lift:make (hash-ref ds 'page-file)))
|
||||
(filter:make (pregexp (format "^/~a/wiki/.+$" px-wikiname)) (lift:make (hash-ref ds 'page-wiki)))
|
||||
(filter:make (pregexp (format "^/~a/search$" px-wikiname)) (lift:make (hash-ref ds 'page-search)))
|
||||
(filter:make (pregexp (format "^/~a(/(wiki(/)?)?)?$" px-wikiname)) (lift:make (hash-ref ds 'redirect-wiki-home)))
|
||||
(hash-ref ds 'static-dispatcher)
|
||||
(lift:make (hash-ref ds 'page-not-found)))))))
|
||||
|
|
|
@ -1,593 +0,0 @@
|
|||
#lang racket/base
|
||||
|
||||
(provide
|
||||
(struct-out extwiki-props^)
|
||||
(struct-out extwiki-group^)
|
||||
extwiki-groups
|
||||
(struct-out extwiki^)
|
||||
extwikis)
|
||||
|
||||
(struct extwiki-props^ (go) #:transparent)
|
||||
|
||||
(struct extwiki-group^ (name links description) #:transparent)
|
||||
(define extwiki-groups
|
||||
(hasheq 'NIWA
|
||||
(extwiki-group^
|
||||
"NIWA"
|
||||
'(("Why did editors leave Fandom?" . "https://www.kotaku.com.au/2022/10/massive-zelda-wiki-reclaims-independence-six-months-before-tears-of-the-kingdom/"))
|
||||
(λ (props)
|
||||
`(p "Most major Nintendo wikis are part of the "
|
||||
(a (@ (href "https://www.niwanetwork.org/about/")) "Nintendo Independent Wiki Alliance")
|
||||
" and have their own wikis off Fandom.")))
|
||||
|
||||
'SEIWA
|
||||
(extwiki-group^
|
||||
"SEIWA"
|
||||
'(("SEIWA Website" . "https://seiwanetwork.org/"))
|
||||
(λ (props)
|
||||
`(p "The Square Enix Indpendent Wiki Alliance, or SEIWA, is a network of independent wikis established in 2011 and focused on providing high-quality coverage of Square Enix and its content. We work together, along with our affiliates and others, to co-operate and support one another while providing the best-quality content on the various Square Enix video games and media.")))
|
||||
|
||||
'GWN
|
||||
(extwiki-group^
|
||||
"GWN"
|
||||
'(("Gaming Wiki Network" . "https://gamingwikinetwork.org/"))
|
||||
(λ (props)
|
||||
`(p "This wiki is part of the Gaming Wiki Network, a network of independently-hosted wikis about video game franchises. The GWN was founded on October 21, 2022. It aims to support all gaming communities in building independently-hosted wikis.")))
|
||||
|
||||
'Terraria
|
||||
(extwiki-group^
|
||||
"Terraria"
|
||||
'(("Announcement: New Official Terraria Wiki!" . "https://forums.terraria.org/index.php?threads/new-official-terraria-wiki-launches-today.111239/") ("In the media" . "https://www.pcgamesn.com/terraria/wiki"))
|
||||
(λ (props) '()))
|
||||
|
||||
'Calamity_Mod
|
||||
(extwiki-group^
|
||||
"Calamity Mod"
|
||||
'(("Announcement: Moving to wiki.gg" . "https://www.reddit.com/r/CalamityMod/comments/ts0586/important_calamity_wiki_announcement/"))
|
||||
(λ (props) '()))
|
||||
|
||||
'ARK
|
||||
(extwiki-group^
|
||||
"ARK"
|
||||
'(("Announcement: Official Wiki Is Moving!" . "https://survivetheark.com/index.php?/forums/topic/657902-official-ark-wiki-feedback/")
|
||||
("Reasons" . "https://todo.sr.ht/~cadence/breezewiki-todo/4#event-216613")
|
||||
("Browser Extension" . "https://old.reddit.com/r/playark/comments/xe51sy/official_ark_wiki_launched_a_browser_extension_to/"))
|
||||
(λ (props) '()))
|
||||
|
||||
'Astroneer
|
||||
(extwiki-group^
|
||||
"Astroneer"
|
||||
'(("Migration discussion" . "https://old.reddit.com/r/Astroneer/comments/z905id/the_official_astroneer_wiki_has_moved_to_wikigg/") ("Migration info" . "https://astroneer.fandom.com/wiki/Talk:Astroneer_Wiki/Migration_to_Wiki.gg"))
|
||||
(λ (props) '()))
|
||||
|
||||
'RuneScape
|
||||
(extwiki-group^
|
||||
"RuneScape"
|
||||
'(("Leaving Wikia" . "https://runescape.wiki/w/Forum:Leaving_Wikia")
|
||||
("In the media" . "https://kotaku.com/video-game-wikis-abandon-their-platform-after-year-of-p-1829401866")
|
||||
("Browser Extension" . "https://runescape.wiki/w/RuneScape:Finding_the_wikis_with_ease#Extensions"))
|
||||
(λ (props) '()))
|
||||
|
||||
'Minecraft
|
||||
(extwiki-group^
|
||||
"Minecraft"
|
||||
'(("Post-Move FAQ" . "https://minecraft.wiki/w/Minecraft_Wiki:Moving_from_Fandom")
|
||||
("Pre-Move Discussion" . "https://minecraft.fandom.com/wiki/Minecraft_Wiki:Moving_from_Fandom")
|
||||
("In the media: PCGamer" . "https://www.pcgamer.com/official-minecraft-wiki-editors-so-furious-at-fandoms-degraded-functionality-and-popups-theyre-overwhelmingly-voting-to-leave-the-site/")
|
||||
("In the media: PCGamesN" . "https://www.pcgamesn.com/minecraft/wiki-fandom"))
|
||||
(λ (props)
|
||||
'(p "The wiki was founded by Citricsquid on July 16th, 2009 as a way to document information from Minecraft. Since November 15th, 2010, it has been hosted by Curse Media. On December 12th, 2018, it moved to Fandom as it purchased Curse Media. Since September 24, 2023, it forked from Fandom and has been hosted by Weird Gloop.")))
|
||||
|
||||
'Tardis
|
||||
(extwiki-group^
|
||||
"Tardis"
|
||||
'(("Forking announcement" . "https://tardis.wiki/wiki/Tardis:Forking_announcement")
|
||||
("Discussion on Reddit" . "https://old.reddit.com/r/doctorwho/comments/1azxmrl/tardis_wiki_has_regenerated/"))
|
||||
(λ (props) '()))
|
||||
|
||||
'Rainverse
|
||||
(extwiki-group^
|
||||
"Rainverse"
|
||||
'(("Forking announcement" . "https://transfem.social/notes/9qsqdkmqi78e01bh"))
|
||||
(λ (props)
|
||||
'()))
|
||||
|
||||
'empty
|
||||
(extwiki-group^
|
||||
"Misc"
|
||||
'(("This wiki doesn't have a description yet. Add one?" . "https://docs.breezewiki.com/Reporting_Bugs.html"))
|
||||
(λ (props) '()))))
|
||||
|
||||
;; wikiname, niwa-name, url, logo-url
|
||||
(struct extwiki^ (wikinames banner group name home logo description) #:transparent)
|
||||
(define extwikis
|
||||
(list
|
||||
(extwiki^
|
||||
'("arms" "armsgame") 'default
|
||||
'NIWA
|
||||
"ARMS Institute"
|
||||
"https://armswiki.org/wiki/Home"
|
||||
"https://niwanetwork.org/images/logos/armswiki.png"
|
||||
(λ (props)
|
||||
`((p "ARMS Institute is a comprehensive resource for information about the Nintendo Switch game, ARMS. Founded on May 1, 2017 and growing rapidly, the wiki strives to offer in-depth coverage of ARMS from both a competitive and casual perspective. Join us and ARM yourself with knowledge!"))))
|
||||
|
||||
(extwiki^
|
||||
'("pokemon" "monster") 'default
|
||||
'NIWA
|
||||
"Bulbapedia"
|
||||
"https://bulbapedia.bulbagarden.net/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/bulbapedia.png"
|
||||
(λ (props)
|
||||
`((p "A part of the Bulbagarden community, Bulbapedia was founded on December 21, 2004 by Liam Pomfret. Everything you need to know about Pokémon can be found at Bulbapedia, whether about the games, the anime, the manga, or something else entirely. With its Bulbanews section and the Bulbagarden forums, it's your one-stop online place for Pokémon."))))
|
||||
|
||||
(extwiki^
|
||||
'("dragalialost") 'default
|
||||
'NIWA
|
||||
"Dragalia Lost Wiki"
|
||||
"https://dragalialost.wiki/w/Dragalia_Lost_Wiki"
|
||||
"https://niwanetwork.org/images/logos/dragalialost.png"
|
||||
(λ (props)
|
||||
`((p "The Dragalia Lost Wiki was originally founded in September 2018 on the Gamepedia platform but went independent in January 2021. The Wiki aims to document anything and everything Dragalia Lost, from in-game data to mechanics, story, guides, and more!"))))
|
||||
|
||||
(extwiki^
|
||||
'("dragonquest") 'default
|
||||
'NIWA
|
||||
"Dragon Quest Wiki"
|
||||
"https://dragon-quest.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/dragonquestwiki.png"
|
||||
(λ (props)
|
||||
`((p "Originally founded on Wikia, the Dragon Quest Wiki was largely inactive until FlyingRagnar became an admin in late 2009. The wiki went independent about a year later when it merged with the Dragon Quest Dictionary/Encyclopedia which was run by Zenithian and supported by the Dragon's Den. The Dragon Quest Wiki aims to be the most complete resource for Dragon Quest information on the web. It continues to grow in the hope that one day the series will be as popular in the rest of the world as it is in Japan."))))
|
||||
|
||||
(extwiki^
|
||||
'("fireemblem") 'default
|
||||
'NIWA
|
||||
"Fire Emblem Wiki"
|
||||
"https://fireemblemwiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/fireemblemwiki.png"
|
||||
(λ (props)
|
||||
`((p "Growing since August 26, 2010, Fire Emblem Wiki is a project whose goal is to cover all information pertaining to the Fire Emblem series. It aspires to become the most complete and accurate independent source of information on this series."))))
|
||||
|
||||
(extwiki^
|
||||
'("fzero" "f-zero") 'default
|
||||
'NIWA
|
||||
"F-Zero Wiki"
|
||||
"https://mutecity.org/wiki/F-Zero_Wiki"
|
||||
"https://niwanetwork.org/images/logos/fzerowiki.png"
|
||||
(λ (props)
|
||||
`((p "Founded on Wikia in November 2007, F-Zero Wiki became independent with NIWA's help in 2011. F-Zero Wiki is quickly growing into the Internet's definitive source for the world of 2200 km/h+, from pilots to machines, and is the founding part of MuteCity.org, the web's first major F-Zero community."))))
|
||||
|
||||
(extwiki^
|
||||
'("goldensun") 'default
|
||||
'NIWA
|
||||
"Golden Sun Universe"
|
||||
"https://www.goldensunwiki.net/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/goldensununiverse.png"
|
||||
(λ (props)
|
||||
`((p "Originally founded on Wikia in late 2006, Golden Sun Universe has always worked hard to meet one particular goal: to be the single most comprehensive yet accessible resource on the Internet for Nintendo's RPG series Golden Sun. It became an independent wiki four years later. Covering characters and plot, documenting all aspects of the gameplay, featuring walkthroughs both thorough and bare-bones, and packed with all manner of odd and fascinating minutiae, Golden Sun Universe leaves no stone unturned!"))))
|
||||
|
||||
(extwiki^
|
||||
'("tetris") 'default
|
||||
'NIWA
|
||||
"Hard Drop - Tetris Wiki"
|
||||
"https://harddrop.com/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/harddrop.png"
|
||||
(λ (props)
|
||||
`((p "The Tetris Wiki was founded by Tetris fans for Tetris fans on tetrisconcept.com in March 2006. The Tetris Wiki torch was passed to harddrop.com in July 2009. Hard Drop is a Tetris community for all Tetris players, regardless of skill or what version of Tetris you play."))))
|
||||
|
||||
(extwiki^
|
||||
'("kidicarus") 'default
|
||||
'NIWA
|
||||
"Icaruspedia"
|
||||
"https://www.kidicaruswiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/icaruspedia.png"
|
||||
(λ (props)
|
||||
`((p "Icaruspedia is the Kid Icarus wiki that keeps flying to new heights. After going independent on January 8, 2012, Icaruspedia has worked to become the largest and most trusted independent source of Kid Icarus information. Just like Pit, they'll keep on fighting until the job is done."))))
|
||||
|
||||
(extwiki^
|
||||
'("splatoon" "uk-splatoon" "splatoon3" "splatoon2") 'default
|
||||
'NIWA
|
||||
"Inkipedia"
|
||||
"https://splatoonwiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/inkipedia.png"
|
||||
(λ (props)
|
||||
`((p "Inkipedia is your ever-growing go-to source for all things Splatoon related. Though founded on Wikia on June 10, 2014, Inkipedia went independent on May 18, 2015, just days before Splatoon's release. Our aim is to cover all aspects of the series, both high and low. Come splat with us now!"))))
|
||||
|
||||
(extwiki^
|
||||
'("starfox") 'default
|
||||
'NIWA
|
||||
"Lylat Wiki"
|
||||
"https://starfoxwiki.info/wiki/Lylat_Wiki"
|
||||
"https://niwanetwork.org/images/logos/lylatwiki.png"
|
||||
(λ (props)
|
||||
`((p "Out of seemingly nowhere, Lylat Wiki sprung up one day in early 2010. Led by creator, Justin Folvarcik, and project head, Tacopill, the wiki has reached stability since the move to its own domain. The staff of Lylat Wiki are glad to help out the NIWA wikis and are even prouder to join NIWA's ranks as the source for information on the Star Fox series."))))
|
||||
|
||||
(extwiki^
|
||||
'("metroid" "themetroid") 'default
|
||||
'NIWA
|
||||
"Metroid Wiki"
|
||||
"https://www.metroidwiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/metroidwiki.png"
|
||||
(λ (props)
|
||||
`((p "Metroid Wiki, founded on January 27, 2010 by Nathanial Rumphol-Janc and Zelda Informer, is a rapidly expanding wiki that covers everything Metroid, from the games, to every suit, vehicle and weapon."))))
|
||||
|
||||
(extwiki^
|
||||
'("nintendo" "nintendoseries" "nintendogames") 'default
|
||||
'NIWA
|
||||
"Nintendo Wiki"
|
||||
"http://niwanetwork.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/nintendowiki.png"
|
||||
(λ (props)
|
||||
`((p "Created on May 12, 2010, NintendoWiki (N-Wiki) is a collaborative project by the NIWA team to create an encyclopedia dedicated to Nintendo, being the company around which all other NIWA content is focused. It ranges from mainstream information such as the games and people who work for the company, to the most obscure info like patents and interesting trivia."))))
|
||||
|
||||
(extwiki^
|
||||
'("animalcrossing" "animalcrossingcf" "acnh") 'default
|
||||
'NIWA
|
||||
"Nookipedia"
|
||||
"https://nookipedia.com/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/nookipedia.png"
|
||||
(λ (props)
|
||||
`((p "Founded in August 2005 on Wikia, Nookipedia was originally known as Animal Crossing City. Shortly after its five-year anniversary, Animal Crossing City decided to merge with the independent Animal Crossing Wiki, which in January 2011 was renamed to Nookipedia. Covering everything from the series including characters, items, critters, and much more, Nookipedia is your number one resource for everything Animal Crossing!"))))
|
||||
|
||||
(extwiki^
|
||||
'("pikmin") 'default
|
||||
'NIWA
|
||||
"Pikipedia"
|
||||
"https://www.pikminwiki.com/"
|
||||
"https://niwanetwork.org/images/logos/pikipedia.png"
|
||||
(λ (props)
|
||||
`((p "Pikipedia, also known as Pikmin Wiki, was founded by Dark Lord Revan on Wikia in December 2005. In September 2010, with NIWA's help, Pikipedia moved away from Wikia to become independent. Pikipedia is working towards their goal of being the foremost source for everything Pikmin."))))
|
||||
|
||||
(extwiki^
|
||||
'("pikmin-fan" "pikpikpedia") 'default
|
||||
'NIWA
|
||||
"Pimkin Fanon"
|
||||
"https://www.pikminfanon.com/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/pikifanon.png"
|
||||
(λ (props)
|
||||
`((p "Pikmin Fanon is a Pikmin wiki for fan stories (fanon). Founded back on November 1, 2008 by Rocky0718 as a part of Wikia, Pikmin Fanon has been independent since September 14, 2010. Check them out for fan created stories based around the Pikmin series."))))
|
||||
|
||||
(extwiki^
|
||||
'("supersmashbros") 'default
|
||||
'NIWA
|
||||
"SmashWiki"
|
||||
"https://www.ssbwiki.com/"
|
||||
"https://niwanetwork.org/images/logos/smashwiki.png"
|
||||
(λ (props)
|
||||
`((p "Originally two separate wikis (one on SmashBoards, the other on Wikia), SmashWiki as we know it was formed out of a merge on February 29th, 2008, becoming independent on September 28th, 2010. SmashWiki is the premier source of Smash Bros. information, from simple tidbits to detailed mechanics, and also touches on the origins of its wealth of content from its sibling franchises."))))
|
||||
|
||||
(extwiki^
|
||||
'("starfy") 'default
|
||||
'NIWA
|
||||
"Starfy Wiki"
|
||||
"https://www.starfywiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/starfywiki.png"
|
||||
(λ (props)
|
||||
`((p "Founded on May 30, 2009, Starfy Wiki's one goal is to become the best source on Nintendo's elusive game series The Legendary Starfy. After gaining independence in 2011 with the help of Tappy and the wiki's original administrative team, the wiki still hopes to achieve its goal and be the best source of Starfy info for all present and future fans."))))
|
||||
|
||||
(extwiki^
|
||||
'() 'default
|
||||
'NIWA
|
||||
"StrategyWiki"
|
||||
"https://www.strategywiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/strategywiki.png"
|
||||
(λ (props)
|
||||
`((p "StrategyWiki was founded in December 2005 by former member Brandon Suit with the idea that the existing strategy guides on the Internet could be improved. Three years later, in December 2008, Scott Jacobi officially established Abxy LLC for the purpose of owning and operating StrategyWiki as a community. Their vision is to bring free, collaborative video game strategy guides to the masses, including Nintendo franchise strategy guides."))))
|
||||
|
||||
(extwiki^
|
||||
'("mario" "themario" "imario" "supermarionintendo" "mariokart" "luigi-kart" "mario3") 'default
|
||||
'NIWA
|
||||
"Super Mario Wiki"
|
||||
"https://www.mariowiki.com/"
|
||||
"https://niwanetwork.org/images/logos/mariowiki.png"
|
||||
(λ (props)
|
||||
`((p "Online since August 12, 2005, when it was founded by Steve Shinn, Super Mario Wiki has you covered for anything Mario, Donkey Kong, Wario, Luigi, Yoshi—the whole gang, in fact. With its own large community in its accompanying forum, Super Mario Wiki is not only a great encyclopedia, but a fansite for you to talk anything Mario."))))
|
||||
|
||||
(extwiki^
|
||||
'("mario64") 'default
|
||||
'NIWA
|
||||
"Ukikipedia"
|
||||
"https://ukikipedia.net/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/ukikipedia.png"
|
||||
(λ (props)
|
||||
`((p "Founded in 2018, Ukikipedia is a wiki focused on expert level knowledge of Super Mario 64, including detailed coverage of game mechanics, glitches, speedrunning, and challenges."))))
|
||||
|
||||
(extwiki^
|
||||
'("advancewars") 'default
|
||||
'NIWA
|
||||
"Wars Wiki"
|
||||
"https://www.warswiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/warswiki.png"
|
||||
(λ (props)
|
||||
`((p "Created in February 2009, Wars Wiki is a small wiki community with a large heart. Founded by JoJo and Wars Central, Wars Wiki is going strong on one of Nintendo's lesser known franchises. Wars Wiki is keen to contribute to NIWA, and we're proud to be able to support them. With the Wars Central community, including forums, it's definitely worth checking out."))))
|
||||
|
||||
(extwiki^
|
||||
'("earthbound") 'default
|
||||
'NIWA
|
||||
"WikiBound"
|
||||
"https://www.wikibound.info/wiki/WikiBound"
|
||||
"https://niwanetwork.org/images/logos/wikibound.png"
|
||||
(λ (props)
|
||||
`((p "Founded in early 2010 by Tacopill, WikiBound strives to create a detailed database on the Mother/EarthBound games, a quaint series only having two games officially released outside of Japan. Help spread the PK Love by editing WikiBound!"))))
|
||||
|
||||
(extwiki^
|
||||
'("kirby") 'default
|
||||
'NIWA
|
||||
"WiKirby"
|
||||
"https://wikirby.com/wiki/Kirby_Wiki"
|
||||
"https://niwanetwork.org/images/logos/wikirby.png"
|
||||
(λ (props)
|
||||
`((p "WiKirby. It's a wiki. About Kirby! Amidst the excitement of NIWA being founded, Josh LeJeune decided to create a Kirby Wiki, due to lack of a strong independent one online. Coming online on January 24, 2010, WiKirby continues its strong launch with a dedicated community and a daily growing source of Kirby based knowledge."))))
|
||||
|
||||
(extwiki^
|
||||
'("xenoblade" "xenoseries" "xenogears" "xenosaga") 'parallel
|
||||
'NIWA
|
||||
"Xeno Series Wiki"
|
||||
"https://www.xenoserieswiki.org/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/xenoserieswiki.png"
|
||||
(λ (props)
|
||||
`((p "Xeno Series Wiki was created February 4, 2020 by Sir Teatei Moonlight. While founded by the desire to have an independent wiki for Xenoblade, there was an interest in including the Xenogears and Xenosaga games within its focus as well. This wide range of coverage means it's always in need of new editors to help bolster its many subjects."))))
|
||||
|
||||
(extwiki^
|
||||
'("zelda" "zelda-archive") 'default
|
||||
'NIWA
|
||||
"Zelda Wiki"
|
||||
"https://zeldawiki.wiki/wiki/Main_Page"
|
||||
"https://niwanetwork.org/images/logos/zeldapedia.png"
|
||||
(λ (props)
|
||||
`((p "Founded on April 23, 2005, Zelda Wiki is your definitive source for encyclopedic information on The Legend of Zelda series, as well as all of the latest Zelda news. Zelda Wiki went independent from Fandom in October 2022, citing Fandom's recent buyouts and staffing decisions among their reasons."))))
|
||||
|
||||
(extwiki^
|
||||
'("chrono") 'default
|
||||
'SEIWA
|
||||
"Chrono Wiki"
|
||||
"https://www.chronowiki.org/wiki/Chrono_Wiki"
|
||||
"https://cdn.wikimg.net/en/chronowiki/images/5/59/Site-wiki.png"
|
||||
(λ (props) '((p "A free encyclopedia dedicated to Chrono Trigger, Chrono Cross, Radical Dreamers, and everything else related to the series. A long, rich history and a friendly, encouraging userbase makes this the best Chrono in the entire time/space continuum!"))))
|
||||
|
||||
(extwiki^
|
||||
'("finalfantasy" "finalfantasyxv" "ffxiclopedia") 'parallel
|
||||
'SEIWA
|
||||
"Final Fantasy Wiki"
|
||||
"https://finalfantasywiki.com/wiki/Main_Page"
|
||||
"https://cdn.finalfantasywiki.com/wiki.png"
|
||||
(λ (props) '((p "A new wiki focused on covering Square Enix's flagship franchise, the critically-acclaimed Final Fantasy series. The Final Fantasy Wiki was founded on January 12, 2020 as part of SEIWA and covers all things Final Fantasy and related franchises."))))
|
||||
|
||||
(extwiki^
|
||||
'("kingdomhearts") 'default
|
||||
'SEIWA
|
||||
"Kingdom Hearts Wiki"
|
||||
"https://www.khwiki.com/"
|
||||
"https://kh.wiki.gallery/images/b/bc/Wiki.png"
|
||||
(λ (props) '((p "The Kingdom Hearts Wiki attempts to document all things related to the Kingdom Hearts series, from elements of storyline to gameplay. The site was originally founded on April 1, 2006 on Wikia and became independent on February 9, 2011. Since this time, the community of the KHWiki strives to be the most professional and comprehensive Kingdom Hearts resource in the world."))))
|
||||
|
||||
(extwiki^
|
||||
'("squareenix") 'default
|
||||
'SEIWA
|
||||
"Square Enix Wiki"
|
||||
"https://wiki.seiwanetwork.org/wiki/Main_Page"
|
||||
"https://cdn.seiwanetwork.org/thumb/9/94/Square_Enix_Wiki_Logo.png/200px-Square_Enix_Wiki_Logo.png"
|
||||
(λ (props) '((p "The Square Enix Wiki was founded on February 8, 2012, and is an up-and-coming wiki project created by SEIWA. It focuses on covering all things Square Enix, from its video game series to its physical publications to its most notable employees and work as a company."))))
|
||||
|
||||
(extwiki^
|
||||
'("terraria") 'default
|
||||
'Terraria
|
||||
"Official Terraria Wiki"
|
||||
"https://terraria.wiki.gg/wiki/Terraria_Wiki"
|
||||
"https://terraria.wiki.gg/images/5/5a/App_icon_1.3_Update.png"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("calamitymod" "calamity-mod") 'empty
|
||||
'Calamity_Mod
|
||||
"Official Calamity Mod Wiki"
|
||||
"https://calamitymod.wiki.gg/wiki/Calamity_Mod_Wiki"
|
||||
#f
|
||||
#f)
|
||||
|
||||
(extwiki^
|
||||
'("ark" "ark-survival-evolved-archive") 'default
|
||||
'ARK
|
||||
"ARK Community Wiki"
|
||||
"https://ark.wiki.gg/wiki/ARK_Survival_Evolved_Wiki"
|
||||
"https://ark.wiki.gg/images/e/e6/Site-logo.png"
|
||||
(λ (props)
|
||||
`((p "The official ARK: Survival Evolved Wiki launched in 2016. In April 2022 it moved to wiki.gg's hosting to improve creative control and the overall browsing experience."))))
|
||||
|
||||
(extwiki^
|
||||
'("runescape") 'default
|
||||
'RuneScape
|
||||
"RuneScape Wiki"
|
||||
"https://runescape.wiki/w/Main_Page"
|
||||
"https://runescape.wiki/images/Wiki.png"
|
||||
(λ (props)
|
||||
`((p "The RuneScape Wiki was founded on April 8, 2005. In October 2018, the wiki left Fandom (then Wikia), citing their apathy towards the wiki and excessive advertisements."))))
|
||||
|
||||
(extwiki^
|
||||
'("oldschoolrunescape") 'default
|
||||
'RuneScape
|
||||
"Old School RuneScape Wiki"
|
||||
"https://oldschool.runescape.wiki/w/Main_Page"
|
||||
"https://oldschool.runescape.wiki/images/Wiki.png"
|
||||
(λ (props)
|
||||
`((p "The Old School RuneScape Wiki was founded on February 14, 2013. In October 2018, the RuneScape Wiki left Fandom (then Wikia), citing their apathy towards the wiki and excessive advertisements, with the Old School RuneScape Wiki following suit."))))
|
||||
|
||||
(extwiki^
|
||||
'("runescapeclassic") 'default
|
||||
'RuneScape
|
||||
"RuneScape Classic Wiki"
|
||||
"https://classic.runescape.wiki/w/Main_Page"
|
||||
"https://classic.runescape.wiki/images/Wiki.png"
|
||||
(λ (props)
|
||||
`((p "The Old School RuneScape Wiki was founded on April 19, 2009. In October 2018, the RuneScape Wiki left Fandom (then Wikia), citing their apathy towards the wiki and excessive advertisements, with the RuneScape Classic Wiki following suit."))))
|
||||
|
||||
(extwiki^
|
||||
'("astroneer") 'default
|
||||
'Astroneer
|
||||
"Astroneer Wiki"
|
||||
"https://astroneer.wiki.gg/wiki/Astroneer_Wiki"
|
||||
"https://astroneer.wiki.gg/images/7/74/Icon_Astroneer.png"
|
||||
(λ (props)
|
||||
`((p "“Fandom bought Gamepedia and forced a migration, with their restricted, ad-heavy appearance, and other annoying features that we could not remove, the wiki grew slow and annoying to use, especially for logged out users.")
|
||||
(p "“We decided to move away from Fandom to Wiki.gg, which returns the wiki to how it used to be on gamepedia, without the ads spamming and forced videos.”"))))
|
||||
|
||||
(extwiki^
|
||||
'("minecraft") 'default
|
||||
'Minecraft
|
||||
"The Minecraft Wiki"
|
||||
"https://minecraft.wiki/w/Minecraft_Wiki"
|
||||
"https://minecraft.wiki/images/Wiki.png"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("tardis") 'default
|
||||
'Tardis
|
||||
"TARDIS Wiki"
|
||||
"https://tardis.wiki/wiki/Doctor_Who_Wiki"
|
||||
"https://tardis.wiki/w/images/Tardis_Images/e/e6/Site-logo.png"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("wizardry") 'default
|
||||
'GWN
|
||||
"Wizardry Wiki"
|
||||
"https://wizardry.wiki.gg/wiki/Wizardry_Wiki"
|
||||
"https://wizardry.wiki.gg/images/e/e6/Site-logo.png"
|
||||
(λ (props)
|
||||
`((p "On March 21, 2023, the wiki has decided to leave and abandoning from Fandom due to numerous of issues such as intrusive advertising, long-lasting bugs, restrictions on customization, etcetera. Wizardry Wiki was officially inducted into the wiki.gg wikifarm, with all contents forked over.")
|
||||
(p "The wiki has partnered with " (a (@ (href "https://fallout.wiki/")) "Independent Fallout Wiki") " as of June 14, 2024."))))
|
||||
|
||||
(extwiki^
|
||||
'("jackryan") 'default
|
||||
'GWN
|
||||
"Tom Clancy Wiki"
|
||||
"https://tomclancy.wiki.gg/wiki/Tom_Clancy_Wiki"
|
||||
"https://tomclancy.wiki.gg/images/thumb/c/c5/Jack_Ryan_Logo_Dark.png/600px-Jack_Ryan_Logo_Dark.png"
|
||||
(λ (props)
|
||||
`((p "The Tom Clancy Wiki is a collaborative encyclopedia dedicated to Tom Clancy’s franchises. The Tom Clancy franchise is a 40-year old expansive franchise founded by Tom Clancy, telling several unique sagas through books, video games, and films, as well as a TV show."))))
|
||||
|
||||
(extwiki^
|
||||
'("hollowknight") 'default
|
||||
'GWN
|
||||
"Hollow Knight Wiki"
|
||||
"https://hollowknight.wiki/wiki/Main_Page"
|
||||
"https://gamingwikinetwork.org/images/logos/hollowknight.png"
|
||||
(λ (props)
|
||||
`((p "We are an independently hosted wiki for the games Hollow Knight and Hollow Knight: Silksong, created by fans, for fans. The wiki is a fork of the FANDOM Hollow Knight Wiki and was officially unveiled on October 31, 2023."))))
|
||||
|
||||
(extwiki^
|
||||
'("hellokitty" "sanrio") 'default
|
||||
'GWN
|
||||
"Sanrio Wiki"
|
||||
"https://sanriowiki.com/wiki/Sanrio_Wiki"
|
||||
"https://cdn.sanriowiki.com/wiki.png"
|
||||
(λ (props)
|
||||
`((p "Sanrio Wiki is a project that was started on April 14, 2015 by EvieMelody. It was hosted on the wiki-farm ShoutWiki and has since become independent."))))
|
||||
|
||||
(extwiki^
|
||||
'("sto") 'default
|
||||
'GWN
|
||||
"Star Trek Online Wiki"
|
||||
"https://stowiki.net/wiki/Main_Page"
|
||||
"https://gamingwikinetwork.org/images/logos/stowiki.png"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("rayman-game" "ubisoftrayman") 'default
|
||||
'GWN
|
||||
"Rayman Wiki"
|
||||
"https://raymanpc.com/wiki/en/Main_Page"
|
||||
"https://raymanpc.com/wiki/script-en/resources/assets/logo-en.png?5c608"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("granblue") 'empty
|
||||
'empty
|
||||
"Granblue Fantasy Wiki"
|
||||
"https://gbf.wiki/"
|
||||
"https://gbf.wiki/images/1/18/Vyrnball.png?0704c"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("hellmet-roblox") 'empty
|
||||
'empty
|
||||
"HELLMET Wiki"
|
||||
"https://hellmet.miraheze.org/wiki/Main_Page"
|
||||
"https://static.miraheze.org/hellmetwiki/thumb/c/ce/Hellmet_Wiki_Logo.png/135px-Hellmet_Wiki_Logo.png"
|
||||
(λ (props)
|
||||
`()))
|
||||
|
||||
(extwiki^
|
||||
'("rain-web-comic") 'default
|
||||
'empty
|
||||
"Rainverse Wiki"
|
||||
"https://rainverse.wiki/wiki/Main_Page"
|
||||
"https://static.miraheze.org/rainversewiki/2/2c/Rain_comic_cover.png"
|
||||
(λ (props)
|
||||
`((p "We have a newly-migrated Rainverse Wiki which escaped from Fandom! Rain is the comic that helped me figure out my gender, so I am really glad to have a wiki on a non-evil host.")
|
||||
(p "Please stop using the abandoned copy of Rain Wiki on Fandom. Fandom is still \"training\" a generator which adds procedurally-generated bullshit to articles, with no way for users to remove or correct it, and they're demanding volunteer wiki admins waste time \"vetting\" the procedurally-generated BS for accuracy. As Jocelyn herself said, \"fuck Fandom forever.\"")
|
||||
(p "If you are interested, please add more articles related to other Rainverse stories."))))
|
||||
|
||||
;; fandom wikinames * empty * empty * Name * Home Page
|
||||
(extwiki^ '("aether") 'empty 'empty "Aether Wiki" "https://aether.wiki.gg/wiki/Aether_Wiki" #f #f)
|
||||
(extwiki^ '("before-darkness-falls") 'empty 'empty "Before Darkness Falls Wiki" "https://beforedarknessfalls.wiki.gg/wiki/Before_Darkness_Falls_Wiki" #f #f)
|
||||
(extwiki^ '("chivalry" "chivalry2") 'empty 'empty "Official Chivalry Wiki" "https://chivalry.wiki.gg/wiki/Chivalry_Wiki" #f #f)
|
||||
(extwiki^ '("clockup") 'empty 'empty "CLOCKUP WIKI" "https://en.clockup.wiki/wiki/Main_Page" #f #f)
|
||||
(extwiki^ '("half-life") 'empty 'empty "Combine OverWiki" "https://combineoverwiki.net/wiki/Main_Page" #f #f)
|
||||
(extwiki^ '("coromon") 'empty 'empty "Coromon Wiki" "https://coromon.wiki.gg/wiki/Coromon_Wiki" #f #f)
|
||||
(extwiki^ '("cosmoteer") 'empty 'empty "Cosmoteer Wiki" "https://cosmoteer.wiki.gg/wiki/Cosmoteer_Wiki" #f #f)
|
||||
(extwiki^ '("criticalrole") 'empty 'empty "Encylopedia Exandria" "https://criticalrole.miraheze.org/wiki/Main_Page" #f #f)
|
||||
(extwiki^ '("cuphead") 'empty 'empty "Cuphead Wiki" "https://cuphead.wiki.gg/wiki/Cuphead_Wiki" #f #f)
|
||||
(extwiki^ '("darkdeity") 'empty 'empty "Dark Deity Wiki" "https://darkdeity.wiki.gg/wiki/Dark_Deity_Wiki" #f #f)
|
||||
(extwiki^ '("deeprockgalactic") 'empty 'empty "Deep Rock Galactic Wiki" "https://deeprockgalactic.wiki.gg/wiki/Deep_Rock_Galactic_Wiki" #f #f)
|
||||
(extwiki^ '("doom") 'empty 'empty "DoomWiki.org" "https://doomwiki.org/wiki/Entryway" #f #f)
|
||||
(extwiki^ '("dreamscaper") 'empty 'empty "Official Dreamscaper Wiki" "https://dreamscaper.wiki.gg/wiki/Dreamscaper_Wiki" #f #f)
|
||||
(extwiki^ '("elderscrolls") 'empty 'empty "UESP" "https://en.uesp.net/wiki/Main_Page" #f #f)
|
||||
(extwiki^ '("enterthegungeon" "exit-the-gungeon" "enter-the-gungeon-archive") 'empty 'empty "Official Enter The Gungeon Wiki" "https://enterthegungeon.wiki.gg/wiki/Enter_the_Gungeon_Wiki" "https://enterthegungeon.wiki.gg/images/e/e6/Site-logo.png" #f)
|
||||
(extwiki^ '("fiend-folio") 'empty 'empty "Official Fiend Folio Wiki" "https://fiendfolio.wiki.gg/wiki/Fiend_Folio_Wiki" #f #f)
|
||||
(extwiki^ '("foxhole") 'empty 'empty "Foxhole Wiki" "https://foxhole.wiki.gg/wiki/Foxhole_Wiki" #f #f)
|
||||
(extwiki^ '("have-a-nice-death") 'empty 'empty "Have a Nice Death Wiki" "https://haveanicedeath.wiki.gg/wiki/Have_a_Nice_Death_Wiki" #f #f)
|
||||
(extwiki^ '("jojo" "jojos") 'empty 'empty "JoJo's Bizarre Encyclopedia" "https://jojowiki.com/" #f #f)
|
||||
(extwiki^ '("legiontd2") 'empty 'empty "Legion TD 2 Wiki" "https://legiontd2.wiki.gg/wiki/Legion_TD_2_Wiki" #f #f)
|
||||
(extwiki^ '("noita") 'empty 'empty "Noita Wiki" "https://noita.wiki.gg/wiki/Noita_Wiki" #f #f)
|
||||
(extwiki^ '("pathofexile") 'empty 'empty "Official Path of Exile Wiki" "https://www.poewiki.net/wiki/Path_of_Exile_Wiki" #f #f)
|
||||
(extwiki^ '("projectarrhythmia") 'empty 'empty "Project Arrhythmia Wiki" "https://projectarrhythmia.wiki.gg/wiki/Project_Arrhythmia_Wiki" #f #f)
|
||||
(extwiki^ '("sandsofaura") 'empty 'empty "Official Sands of Aura Wiki" "https://sandsofaura.wiki.gg/wiki/Sands_of_Aura_Wiki" #f #f)
|
||||
(extwiki^ '("seaofthieves") 'empty 'empty "Official Sea of Thieves Wiki" "https://seaofthieves.wiki.gg/wiki/Sea_of_Thieves" #f #f)
|
||||
(extwiki^ '("sonsoftheforest") 'empty 'empty "Sons of the Forest Wiki" "https://sonsoftheforest.wiki.gg/wiki/Sons_of_the_Forest_Wiki" #f #f)
|
||||
(extwiki^ '("stardewvalley") 'empty 'empty "Official Stardew Valley Wiki" "https://www.stardewvalleywiki.com/Stardew_Valley_Wiki" #f #f)
|
||||
(extwiki^ '("steamworld") 'empty 'empty "Official SteamWorld Wiki" "https://steamworld.wiki.gg/wiki/SteamWorld_Wiki" #f #f)
|
||||
(extwiki^ '("teamfortress") 'empty 'empty "Official Team Fortress Wiki" "https://wiki.teamfortress.com/wiki/Main_Page" #f #f)
|
||||
(extwiki^ '("temtem") 'empty 'empty "Official Temtem Wiki" "https://temtem.wiki.gg/wiki/Temtem_Wiki" #f #f)
|
||||
(extwiki^ '("terrariamods") 'empty 'empty "Official Terraria Mods Wiki" "https://terrariamods.wiki.gg/wiki/Terraria_Mods_Wiki" #f #f)
|
||||
(extwiki^ '("thoriummod") 'empty 'empty "Official Thorium Mod Wiki" "https://thoriummod.wiki.gg/wiki/Thorium_Mod_Wiki" #f #f)
|
||||
(extwiki^ '("totherescue") 'empty 'empty "To The Rescue!" "https://totherescue.wiki.gg/wiki/To_The_Rescue%21_Wiki" #f #f)
|
||||
(extwiki^ '("touhou") 'empty 'empty "Touhou Wiki" "https://en.touhouwiki.net/wiki/Touhou_Wiki" #f #f)
|
||||
(extwiki^ '("undermine") 'empty 'empty "Official UnderMine Wiki" "https://undermine.wiki.gg/wiki/UnderMine_Wiki" #f #f)
|
||||
(extwiki^ '("westofloathing" "loathing") 'empty 'empty "Wiki of Loathing" "https://loathing.wiki.gg/wiki/Wiki_of_Loathing" #f #f)
|
||||
(extwiki^ '("willyousnail") 'empty 'empty "Official Will You Snail Wiki" "https://willyousnail.wiki.gg/wiki/Will_You_Snail_Wiki" #f #f)
|
||||
(extwiki^ '("yumenikki" "yume-nikki-dream-diary") 'empty 'empty "Yume Wiki" "https://yume.wiki/Main_Page" #f #f)))
|
||||
|
||||
;; get the current dataset so it can be stored above
|
||||
(module+ fetch
|
||||
(require racket/generator
|
||||
racket/list
|
||||
net/http-easy
|
||||
html-parsing
|
||||
"../lib/xexpr-utils.rkt")
|
||||
(define r (get "https://www.niwanetwork.org/members/"))
|
||||
(define x (html->xexp (bytes->string/utf-8 (response-body r))))
|
||||
(define english ((query-selector (λ (e a c) (equal? (get-attribute 'id a) "content1")) x)))
|
||||
(define gen (query-selector (λ (e a c) (has-class? "member" a)) english))
|
||||
(for/list ([item (in-producer gen #f)])
|
||||
(define links (query-selector (λ (e a c) (eq? e 'a)) item))
|
||||
(define url (get-attribute 'href (bits->attributes (links))))
|
||||
(define title (third (links)))
|
||||
(define icon (get-attribute 'src (bits->attributes ((query-selector (λ (e a c) (eq? e 'img)) item)))))
|
||||
(define description (second ((query-selector (λ (e a c) (eq? e 'p)) item))))
|
||||
(list '() title url icon description)))
|
|
@ -1,130 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/list
|
||||
racket/match
|
||||
racket/string
|
||||
memo
|
||||
net/http-easy
|
||||
html-parsing
|
||||
"../lib/pure-utils.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
get-redirect-content)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
;; fandom wikinames * Title * Main Page * Search page override * API endpoint override
|
||||
(define wikis
|
||||
'(((gallowmere) "MediEvil Wiki" "https://medievil.wiki/w/Main_Page" #f #f)
|
||||
((fallout) "Fallout Wiki" "https://fallout.wiki/wiki/Fallout_Wiki" #f "https://fallout.wiki/api.php")
|
||||
((drawntolife) "Wapopedia" "https://drawntolife.wiki/en/Main_Page" #f "https://drawntolife.wiki/w/api.php")
|
||||
))
|
||||
|
||||
(define wikis-hash (make-hash))
|
||||
(for ([w wikis])
|
||||
(for ([wikiname (car w)])
|
||||
(hash-set! wikis-hash (symbol->string wikiname) w)))
|
||||
(module+ test
|
||||
(check-equal? (cadr (hash-ref wikis-hash "gallowmere"))
|
||||
"MediEvil Wiki"))
|
||||
|
||||
(define (parse-table table)
|
||||
(define rows (query-selector (λ (t a c) (eq? t 'tr)) table))
|
||||
(define header-row (rows))
|
||||
(define column-names
|
||||
(for/list ([th (in-producer (query-selector (λ (t a c) (eq? t 'th)) header-row) #f)])
|
||||
(string->symbol (string-downcase (string-trim (findf string? th))))))
|
||||
(define data-row (rows))
|
||||
(for/hasheq ([col-name column-names]
|
||||
[col-value (in-producer (query-selector (λ (t a c) (eq? t 'td)) data-row) #f)])
|
||||
(values col-name (filter element-is-content? (cdr col-value)))))
|
||||
(module+ test
|
||||
(check-equal? (parse-table (html->xexp "<table> <tbody><tr> <th>Links</th></tr> <tr> <td><a target=\"_blank\" rel=\"nofollow noreferrer noopener\" class=\"external text\" href=\"https://sirdanielfortesque.proboards.com/\">Forum</a></td></tr></tbody></table>"))
|
||||
'#hasheq((links . ((a (@ (target "_blank") (rel "nofollow noreferrer noopener") (class "external text") (href "https://sirdanielfortesque.proboards.com/")) "Forum"))))))
|
||||
|
||||
(define (table->links table)
|
||||
(define v (hash-ref table 'links #f))
|
||||
(cond/var
|
||||
[(not v) (values null '("Data table must have a \"Links\" column"))]
|
||||
(var links (filter (λ (a) (and (pair? a) (eq? (car a) 'a))) v)) ; <a> elements
|
||||
[(null? links) (values null '("Links column must have at least one link"))]
|
||||
[#t (values links null)]))
|
||||
|
||||
(define (table->logo table)
|
||||
(define logo (hash-ref table 'logo #f))
|
||||
(cond/var
|
||||
[(not logo) (values #f '("Data table must have a \"Logo\" column"))]
|
||||
[(null? logo) (values #f '("Logo table column must have a link"))]
|
||||
(var href (get-attribute 'href (bits->attributes (car (hash-ref table 'logo)))))
|
||||
(var src (get-attribute 'src (bits->attributes (car (hash-ref table 'logo)))))
|
||||
(var true-src (or href src))
|
||||
[(not true-src) (values #f '("Logo table column must have a link"))]
|
||||
[#t (values true-src null)]))
|
||||
|
||||
(define (get-api-endpoint wiki)
|
||||
(define main-page (third wiki))
|
||||
(define override (fifth wiki))
|
||||
(or override
|
||||
(match main-page
|
||||
[(regexp #rx"/$") (string-append main-page "api.php")]
|
||||
[(regexp #rx"^(.*)/wiki/" (list _ domain)) (string-append domain "/w/api.php")]
|
||||
[(regexp #rx"^(.*)/w/" (list _ domain)) (string-append domain "/api.php")]
|
||||
[_ (error 'get-api-endpoint "unknown url format: ~a" main-page)])))
|
||||
|
||||
(define (get-search-page wiki)
|
||||
(define main-page (third wiki))
|
||||
(define override (fourth wiki))
|
||||
(or override
|
||||
(match main-page
|
||||
[(regexp #rx"/$") (string-append main-page "Special:Search")]
|
||||
[(regexp #rx"^(.*/(?:en|w[^./]*)/)" (list _ wiki-prefix)) (string-append wiki-prefix "Special:Search")]
|
||||
[_ (error 'get-search-page "unknown url format: ~a" main-page)])))
|
||||
|
||||
(define/memoize (get-redirect-content wikiname) #:hash hash
|
||||
(define wiki (hash-ref wikis-hash wikiname #f))
|
||||
(cond
|
||||
[wiki
|
||||
(define display-name (cadr wiki))
|
||||
(define endpoint (string-append (get-api-endpoint wiki) "?action=parse&page=MediaWiki:BreezeWikiRedirect&prop=text&formatversion=2&format=json"))
|
||||
(define res (get endpoint))
|
||||
(define html (jp "/parse/text" (response-json res)))
|
||||
(define content ((query-selector (λ (t a c) (has-class? "mw-parser-output" a))
|
||||
(html->xexp html))))
|
||||
(define body (for/list ([p (in-producer (query-selector (λ (t a c) (eq? t 'p)) content) #f)]) p))
|
||||
(define table (parse-table ((query-selector (λ (t a c) (eq? t 'table)) content))))
|
||||
(define-values (links links-errors) (table->links table))
|
||||
(define-values (logo logo-errors) (table->logo table))
|
||||
(define construct-errors (append links-errors logo-errors))
|
||||
(λ (title)
|
||||
(define go
|
||||
(string-append (get-search-page wiki)
|
||||
"?"
|
||||
(params->query `(("search" . ,title)
|
||||
("go" . "Go")))))
|
||||
`(aside (@ (class "niwa__notice"))
|
||||
(h1 (@ (class "niwa__header")) ,display-name " has its own website separate from Fandom.")
|
||||
(div (@ (class "niwa__cols"))
|
||||
(div (@ (class "niwa__left"))
|
||||
(a (@ (class "niwa__go") (href ,go)) "Read " ,title " on " ,display-name " →")
|
||||
,@body
|
||||
(p "This external wiki is a helpful alternative to Fandom. You should "
|
||||
(a (@ (href ,go)) "check it out now!")))
|
||||
,(if logo
|
||||
`(div (@ (class "niwa__right"))
|
||||
(img (@ (class "niwa__logo") (src ,logo))))
|
||||
""))
|
||||
,(if (pair? links)
|
||||
`(p (@ (class "niwa__feedback"))
|
||||
,@(add-between links " / "))
|
||||
"")
|
||||
,(if (pair? construct-errors)
|
||||
`(ul
|
||||
,@(for/list ([error construct-errors])
|
||||
`(li ,error)))
|
||||
"")))]
|
||||
[#t #f]))
|
||||
(module+ test
|
||||
(check-not-false ((get-redirect-content "gallowmere") "MediEvil Wiki")))
|
|
@ -1,74 +0,0 @@
|
|||
#lang typed/racket/base
|
||||
(require racket/format
|
||||
racket/string
|
||||
"config.rkt"
|
||||
"../lib/url-utils.rkt")
|
||||
(define-type Headers (HashTable Symbol (U Bytes String)))
|
||||
(require/typed net/http-easy
|
||||
[#:opaque Timeout-Config timeout-config?]
|
||||
[#:opaque Response response?]
|
||||
[#:opaque Session session?]
|
||||
[response-status-code (Response -> Natural)]
|
||||
[current-session (Parameter Session)]
|
||||
[current-user-agent (Parameter (U Bytes String))]
|
||||
[make-timeout-config ([#:lease Positive-Real] [#:connect Positive-Real] -> Timeout-Config)]
|
||||
[get ((U Bytes String)
|
||||
[#:close? Boolean]
|
||||
[#:headers Headers]
|
||||
[#:timeouts Timeout-Config]
|
||||
[#:max-attempts Exact-Positive-Integer]
|
||||
[#:max-redirects Exact-Nonnegative-Integer]
|
||||
[#:user-agent (U Bytes String)]
|
||||
-> Response)])
|
||||
|
||||
(provide
|
||||
fandom-get
|
||||
fandom-get-api
|
||||
timeouts)
|
||||
|
||||
(unless (string-contains? (~a (current-user-agent)) "BreezeWiki")
|
||||
(current-user-agent
|
||||
(format "BreezeWiki/1.0 (~a) ~a"
|
||||
(if (config-true? 'canonical_origin)
|
||||
(config-get 'canonical_origin)
|
||||
"local")
|
||||
(current-user-agent))))
|
||||
|
||||
(define timeouts (make-timeout-config #:lease 5 #:connect 5))
|
||||
|
||||
(: last-failure Flonum)
|
||||
(define last-failure 0.0)
|
||||
(: stored-failure (Option Response))
|
||||
(define stored-failure #f)
|
||||
(define failure-persist-time 30000)
|
||||
|
||||
(: no-headers Headers)
|
||||
(define no-headers '#hasheq())
|
||||
|
||||
(: fandom-get (String String [#:headers (Option Headers)] -> Response))
|
||||
(define (fandom-get wikiname path #:headers [headers #f])
|
||||
(or
|
||||
(and ((current-inexact-milliseconds) . < . (+ last-failure failure-persist-time)) stored-failure)
|
||||
(let ()
|
||||
(define dest-url (string-append "https://www.fandom.com" path))
|
||||
(define host (string-append wikiname ".fandom.com"))
|
||||
(log-outgoing wikiname path)
|
||||
(define res
|
||||
(get dest-url
|
||||
#:timeouts timeouts
|
||||
#:headers (hash-set (or headers no-headers) 'Host host)))
|
||||
(when (memq (response-status-code res) '(403 406))
|
||||
(set! last-failure (current-inexact-milliseconds))
|
||||
(set! stored-failure res))
|
||||
res)))
|
||||
|
||||
(: fandom-get-api (String (Listof (Pair String String)) [#:headers (Option Headers)] -> Response))
|
||||
(define (fandom-get-api wikiname params #:headers [headers #f])
|
||||
(fandom-get wikiname
|
||||
(string-append "/api.php?" (params->query params))
|
||||
#:headers headers))
|
||||
|
||||
(: log-outgoing (String String -> Void))
|
||||
(define (log-outgoing wikiname path)
|
||||
(when (config-true? 'log_outgoing)
|
||||
(printf "out: ~a ~a~n" wikiname path)))
|
63
src/log.rkt
63
src/log.rkt
|
@ -1,63 +0,0 @@
|
|||
#lang typed/racket/base
|
||||
(require racket/file
|
||||
racket/path
|
||||
racket/port
|
||||
racket/string
|
||||
typed/srfi/19
|
||||
"config.rkt")
|
||||
|
||||
(provide
|
||||
log-page-request
|
||||
log-styles-request
|
||||
log-set-settings-request)
|
||||
|
||||
(define last-flush 0)
|
||||
(define flush-every-millis 60000)
|
||||
|
||||
;; anytime-path macro expansion only works in an untyped submodule for reasons I cannot comprehend
|
||||
(module define-log-dir racket/base
|
||||
(require racket/path
|
||||
"../lib/syntax.rkt")
|
||||
(provide log-dir)
|
||||
(define log-dir (anytime-path ".." "storage/logs")))
|
||||
(require/typed (submod "." define-log-dir)
|
||||
[log-dir Path])
|
||||
|
||||
(define log-file (build-path log-dir "access-0.log"))
|
||||
(define log-port
|
||||
(if (config-true? 'access_log::enabled)
|
||||
(begin
|
||||
(make-directory* log-dir)
|
||||
(open-output-file log-file #:exists 'append))
|
||||
(open-output-nowhere)))
|
||||
|
||||
(: get-date-iso8601 (-> String))
|
||||
(define (get-date-iso8601)
|
||||
(date->string (current-date 0) "~5"))
|
||||
|
||||
(: offline-string (Boolean -> String))
|
||||
(define (offline-string offline?)
|
||||
(if offline? "---" "ooo"))
|
||||
|
||||
(: log (String * -> Void))
|
||||
(define (log . entry)
|
||||
;; create log entry string
|
||||
(define full-entry (cons (get-date-iso8601) entry))
|
||||
;; write to output port
|
||||
(displayln (string-join full-entry ";") log-port)
|
||||
;; flush output port to file (don't do this too frequently)
|
||||
(when ((- (current-milliseconds) last-flush) . >= . flush-every-millis)
|
||||
(flush-output log-port)
|
||||
(set! last-flush (current-milliseconds))))
|
||||
|
||||
(: log-page-request (Boolean String String (U 'light 'dark 'default) -> Void))
|
||||
(define (log-page-request offline? wikiname title theme)
|
||||
(log "page" (offline-string offline?) wikiname title (symbol->string theme)))
|
||||
|
||||
(: log-styles-request (Boolean String String -> Void))
|
||||
(define (log-styles-request offline? wikiname basename)
|
||||
(log "style" (offline-string offline?) wikiname basename))
|
||||
|
||||
(: log-set-settings-request (Symbol -> Void))
|
||||
(define (log-set-settings-request theme)
|
||||
(log "settings" (symbol->string theme)))
|
|
@ -15,38 +15,33 @@
|
|||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"fandom-request.rkt"
|
||||
"page-wiki.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/thread-utils.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"syntax.rkt"
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
page-category)
|
||||
|
||||
(module+ test
|
||||
(require rackunit
|
||||
"test-utils.rkt")
|
||||
(require rackunit)
|
||||
(define category-json-data
|
||||
'#hasheq((batchcomplete . #t) (continue . #hasheq((cmcontinue . "page|4150504c45|41473") (continue . "-||"))) (query . #hasheq((categorymembers . (#hasheq((ns . 0) (pageid . 25049) (title . "Item (entity)")) #hasheq((ns . 0) (pageid . 128911) (title . "3D")) #hasheq((ns . 0) (pageid . 124018) (title . "A Very Fine Item")) #hasheq((ns . 0) (pageid . 142208) (title . "Amethyst Shard")) #hasheq((ns . 0) (pageid . 121612) (title . "Ankle Monitor")))))))))
|
||||
|
||||
(define (generate-results-page
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:members-data members-data
|
||||
#:page page
|
||||
#:head-data [head-data #f]
|
||||
#:body-class [body-class #f]
|
||||
#:siteinfo [siteinfo #f])
|
||||
(define members (jp "/query/categorymembers" members-data))
|
||||
(generate-wiki-page
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:head-data head-data
|
||||
#:body-class body-class
|
||||
#:siteinfo siteinfo
|
||||
`(div
|
||||
,(update-tree-wiki page wikiname)
|
||||
|
@ -57,7 +52,7 @@
|
|||
,@(map
|
||||
(λ (result)
|
||||
(define title (jp "/title" result))
|
||||
(define page-path (page-title->path title))
|
||||
(define page-path (regexp-replace* #rx" " title "_"))
|
||||
`(li
|
||||
(a (@ (href ,(format "/~a/wiki/~a" wikiname page-path)))
|
||||
,title)))
|
||||
|
@ -66,61 +61,64 @@
|
|||
(define (page-category req)
|
||||
(response-handler
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define prefixed-category (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
|
||||
(define prefixed-category (path/param-path (caddr (url-path (request-uri req)))))
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(define source-url (format "~a/wiki/~a" origin prefixed-category))
|
||||
|
||||
(define-values (members-data page-data siteinfo)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(easy:response-json
|
||||
(fandom-get-api
|
||||
wikiname
|
||||
`(("action" . "query")
|
||||
("list" . "categorymembers")
|
||||
("cmtitle" . ,prefixed-category)
|
||||
("cmlimit" . "max")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(λ ()
|
||||
(easy:response-json
|
||||
(fandom-get-api
|
||||
wikiname
|
||||
`(("action" . "parse")
|
||||
("page" . ,prefixed-category)
|
||||
("prop" . "text|headhtml|langlinks")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(λ ()
|
||||
(siteinfo-fetch wikiname))))
|
||||
(thread-let
|
||||
([members-data (define dest-url
|
||||
(format "~a/api.php?~a"
|
||||
origin
|
||||
(params->query `(("action" . "query")
|
||||
("list" . "categorymembers")
|
||||
("cmtitle" . ,prefixed-category)
|
||||
("cmlimit" . "max")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(log-outgoing dest-url)
|
||||
(define dest-res (easy:get dest-url #:timeouts timeouts))
|
||||
(easy:response-json dest-res)]
|
||||
[page-data (define dest-url
|
||||
(format "~a/api.php?~a"
|
||||
origin
|
||||
(params->query `(("action" . "parse")
|
||||
("page" . ,prefixed-category)
|
||||
("prop" . "text|headhtml|langlinks")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(log-outgoing dest-url)
|
||||
(define dest-res (easy:get dest-url #:timeouts timeouts))
|
||||
(easy:response-json dest-res)]
|
||||
[siteinfo (siteinfo-fetch wikiname)])
|
||||
|
||||
(define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category)))
|
||||
(define page-html (preprocess-html-wiki (jp "/parse/text" page-data "")))
|
||||
(define page (html->xexp page-html))
|
||||
(define head-data ((head-data-getter wikiname) page-data))
|
||||
(define body (generate-results-page
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:members-data members-data
|
||||
#:page page
|
||||
#:head-data head-data
|
||||
#:siteinfo siteinfo))
|
||||
(define title (preprocess-html-wiki (jp "/parse/title" page-data prefixed-category)))
|
||||
(define page-html (preprocess-html-wiki (jp "/parse/text" page-data "")))
|
||||
(define page (html->xexp page-html))
|
||||
(define head-html (jp "/parse/headhtml" page-data ""))
|
||||
(define body-class (match (regexp-match #rx"<body [^>]*class=\"([^\"]*)" head-html)
|
||||
[(list _ classes) classes]
|
||||
[_ ""]))
|
||||
(define body (generate-results-page
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:members-data members-data
|
||||
#:page page
|
||||
#:body-class body-class
|
||||
#:siteinfo siteinfo))
|
||||
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out)
|
||||
(write-html body out)))))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out)
|
||||
(write-html body out))))))
|
||||
(module+ test
|
||||
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Ankle_Monitor")
|
||||
(generate-results-page
|
||||
#:req test-req
|
||||
#:source-url ""
|
||||
#:wikiname "test"
|
||||
#:title "Category:Items"
|
||||
|
|
|
@ -15,18 +15,15 @@
|
|||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"fandom-request.rkt"
|
||||
"page-wiki.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/thread-utils.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"syntax.rkt"
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide page-file)
|
||||
|
||||
(module+ test
|
||||
(require rackunit
|
||||
"test-utils.rkt")
|
||||
(require rackunit)
|
||||
(define test-media-detail
|
||||
'#hasheq((fileTitle . "Example file")
|
||||
(videoEmbedCode . "")
|
||||
|
@ -40,7 +37,8 @@
|
|||
(imageDescription . #f))))
|
||||
|
||||
(define (url-content-type url)
|
||||
(define dest-res (easy:head url))
|
||||
(log-outgoing url)
|
||||
(define dest-res (easy:head url #:timeouts timeouts))
|
||||
(easy:response-headers-ref dest-res 'content-type))
|
||||
|
||||
(define (get-media-html url content-type)
|
||||
|
@ -53,8 +51,7 @@
|
|||
[(regexp-match? #rx"(?i:^video/)" content-type) `(video (@ (src ,maybe-proxied-url) (controls)))]
|
||||
[else `""]))
|
||||
|
||||
(define (generate-results-page #:req req
|
||||
#:source-url source-url
|
||||
(define (generate-results-page #:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:media-detail media-detail
|
||||
|
@ -71,7 +68,6 @@
|
|||
(define maybe-proxied-raw-image-url
|
||||
(if (config-true? 'strict_proxy) (u-proxy-url raw-image-url) raw-image-url))
|
||||
(generate-wiki-page
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
|
@ -102,49 +98,46 @@
|
|||
`""))))
|
||||
|
||||
(define (page-file req)
|
||||
(response-handler
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define prefixed-title (path/param-path (caddr (url-path (request-uri req)))))
|
||||
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname prefixed-title))
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define prefixed-title (path/param-path (caddr (url-path (request-uri req)))))
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(define source-url (format "~a/wiki/~a" origin prefixed-title))
|
||||
|
||||
(define-values (media-detail siteinfo)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(define dest-res
|
||||
(fandom-get
|
||||
wikiname
|
||||
(format "/wikia.php?~a"
|
||||
(params->query `(("format" . "json") ("controller" . "Lightbox")
|
||||
("method" . "getMediaDetail")
|
||||
("fileTitle" . ,prefixed-title))))))
|
||||
(easy:response-json dest-res))
|
||||
(λ ()
|
||||
(siteinfo-fetch wikiname))))
|
||||
(if (not (jp "/exists" media-detail #f))
|
||||
(next-dispatcher)
|
||||
(response-handler
|
||||
(define file-title (jp "/fileTitle" media-detail ""))
|
||||
(define title
|
||||
(if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title))
|
||||
(define image-content-type
|
||||
(if (non-empty-string? (jp "/videoEmbedCode" media-detail ""))
|
||||
#f
|
||||
(url-content-type (jp "/imageUrl" media-detail))))
|
||||
(define body
|
||||
(generate-results-page #:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:media-detail media-detail
|
||||
#:image-content-type image-content-type
|
||||
#:siteinfo siteinfo))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output #:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out) (write-html body out)))))))
|
||||
(thread-let ([media-detail
|
||||
(define dest-url
|
||||
(format "~a/wikia.php?~a"
|
||||
origin
|
||||
(params->query `(("format" . "json") ("controller" . "Lightbox")
|
||||
("method" . "getMediaDetail")
|
||||
("fileTitle" . ,prefixed-title)))))
|
||||
(log-outgoing dest-url)
|
||||
(define dest-res (easy:get dest-url #:timeouts timeouts))
|
||||
(easy:response-json dest-res)]
|
||||
[siteinfo (siteinfo-fetch wikiname)])
|
||||
(if (not (jp "/exists" media-detail #f))
|
||||
(next-dispatcher)
|
||||
(response-handler
|
||||
(define file-title (jp "/fileTitle" media-detail ""))
|
||||
(define title
|
||||
(if (non-empty-string? file-title) (format "File:~a" file-title) prefixed-title))
|
||||
(define image-content-type
|
||||
(if (non-empty-string? (jp "/videoEmbedCode" media-detail ""))
|
||||
#f
|
||||
(url-content-type (jp "/imageUrl" media-detail))))
|
||||
(define body
|
||||
(generate-results-page #:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:media-detail media-detail
|
||||
#:image-content-type image-content-type
|
||||
#:siteinfo siteinfo))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output #:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out) (write-html body out)))))))
|
||||
(module+ test
|
||||
(parameterize ([(config-parameter 'strict_proxy) "true"])
|
||||
(check-equal? (get-media-html "https://static.wikia.nocookie.net/a" "image/jpeg")
|
||||
|
@ -166,8 +159,7 @@
|
|||
(check-not-false
|
||||
((query-selector
|
||||
(attribute-selector 'src "/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fexamplefile")
|
||||
(generate-results-page #:req test-req
|
||||
#:source-url ""
|
||||
(generate-results-page #:source-url ""
|
||||
#:wikiname "test"
|
||||
#:title "File:Example file"
|
||||
#:media-detail test-media-detail
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
net/url
|
||||
web-server/http
|
||||
"application-globals.rkt"
|
||||
"data.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
page-global-search)
|
||||
|
@ -15,18 +14,12 @@
|
|||
(define wikiname (dict-ref (url-query (request-uri req)) 'wikiname #f))
|
||||
(define q (dict-ref (url-query (request-uri req)) 'q #f))
|
||||
(response-handler
|
||||
(cond
|
||||
[(not wikiname)
|
||||
(response/output
|
||||
#:code 400
|
||||
#:mime-type #"text/plain"
|
||||
(λ (out)
|
||||
(displayln "Requires wikiname and q parameters." out)))]
|
||||
[(or (not q) (equal? q ""))
|
||||
(define siteinfo (siteinfo-fetch wikiname))
|
||||
(define dest (format "/~a/wiki/~a" wikiname (or (siteinfo^-basepage siteinfo) "Main_Page")))
|
||||
(generate-redirect dest)]
|
||||
[#t
|
||||
(generate-redirect (format "/~a/search?~a"
|
||||
wikiname
|
||||
(params->query `(("q" . ,q)))))])))
|
||||
(if (not (and wikiname q))
|
||||
(response/output
|
||||
#:code 400
|
||||
#:mime-type "text/plain"
|
||||
(λ (out)
|
||||
(displayln "Requires wikiname and q parameters." out)))
|
||||
(generate-redirect (format "/~a/search?~a"
|
||||
wikiname
|
||||
(params->query `(("q" . ,q))))))))
|
||||
|
|
|
@ -4,10 +4,8 @@
|
|||
html-writing
|
||||
web-server/http
|
||||
"application-globals.rkt"
|
||||
"data.rkt"
|
||||
"static-data.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt"
|
||||
"config.rkt")
|
||||
|
||||
(provide
|
||||
|
@ -17,27 +15,24 @@
|
|||
(require rackunit))
|
||||
|
||||
(define examples
|
||||
'(("minecraft" "Bricks")
|
||||
("crosscode" "CrossCode Wiki")
|
||||
("undertale" "Hot Dog...?")
|
||||
("tardis" "Eleanor Blake")
|
||||
("zelda" "Boomerang")))
|
||||
'(("crosscode" "CrossCode_Wiki")
|
||||
("pokemon" "Eevee")
|
||||
("minecraft" "Bricks")
|
||||
("undertale" "Hot_Dog...%3F")
|
||||
("tardis" "Eleanor_Blake")
|
||||
("fireemblem" "God-Shattering_Star")
|
||||
("fallout" "Pip-Boy_3000")))
|
||||
|
||||
(define content
|
||||
`((h2 "BreezeWiki makes wiki pages on Fandom readable")
|
||||
(p "It removes ads, videos, and suggested content, leaving you with a clean page that doesn't slow down your device or use up your data.")
|
||||
(p "BreezeWiki can also be called an \"alternative frontend for Fandom\".")
|
||||
(p ,(format "To use BreezeWiki, just replace \"fandom.com\" with \"~a\", and you'll instantly be teleported to a better world."
|
||||
(if (config-true? 'canonical_origin)
|
||||
(url-host (string->url (config-get 'canonical_origin)))
|
||||
"breezewiki.com")))
|
||||
(p "If you'd like to be automatically sent to BreezeWiki every time in the future, "
|
||||
,@(if (config-member? 'promotions::indie_wiki_buddy "home")
|
||||
`((a (@ (href "https://getindie.wiki")) "get our affiliated browser extension (NEW!)")
|
||||
" or ")
|
||||
null)
|
||||
(a (@ (href "https://docs.breezewiki.com/Automatic_Redirection.html")) "check out the tutorial in the manual."))
|
||||
(p "BreezeWiki is available on several different websites called " (a (@ (href "https://en.wikipedia.org/wiki/Mirror_site")) "mirrors") ". Each is independently run. If one mirror is offline, the others still work. "
|
||||
(a (@ (href "https://docs.breezewiki.com/Links.html#%28part._.Mirrors%29")) "See the list."))
|
||||
(h2 "Find a page")
|
||||
(form (@ (action "/search"))
|
||||
(label (@ (class "paired__label"))
|
||||
|
@ -45,47 +40,42 @@
|
|||
(input (@ (name "wikiname") (class "paired__input") (type "text") (placeholder "pokemon") (required))))
|
||||
(label (@ (class "paired__label"))
|
||||
"Search query"
|
||||
(input (@ (name "q") (class "paired__input") (type "text") (placeholder "Eevee"))))
|
||||
(input (@ (name "q") (class "paired__input") (type "text") (placeholder "Eevee") (required))))
|
||||
(button "Search"))
|
||||
(h2 "Example pages")
|
||||
(ul
|
||||
,@(map (λ (x)
|
||||
`(li (a (@ (href ,(format "/~a/wiki/~a" (car x) (page-title->path (cadr x)))))
|
||||
`(li (a (@ (href ,(apply format "/~a/wiki/~a" x)))
|
||||
,(apply format "~a: ~a" x))))
|
||||
examples))
|
||||
(h2 "Testimonials")
|
||||
(p (@ (class "testimonial")) ">so glad someone introduced me to a F*ndom alternative (BreezeWiki) because that x-factorized spillway of an ad-infested radioactive dumpsite can go die in a fire —RB")
|
||||
(p (@ (class "testimonial")) ">apparently there are thousands of people essentially running our company " (em "for free") " right now, creating tons of content, and we just put ads on top of it and they're not even employees. thousands of people we can't lay off. thousands! —" (a (@ (href "https://hard-drive.net/fandom-ceo-frustrated-its-impossible-to-lay-off-unpaid-users-who-update-wikias-for-fun/?utm_source=breezewiki") (target "_blank")) "Perkins Miller, Fandom CEO"))
|
||||
(p (@ (class "testimonial")) ">So glad to never have to touch fandom's garbage platform directly ever again —RNL")
|
||||
(p (@ (class "testimonial")) ">you are so right that fandom still sucks even with adblock somehow. even zapping all the stupid padding it still sucks —Minimus")
|
||||
(p (@ (class "testimonial")) ">attempting to go to a wiki's forum page with breezewiki doesn't work, which is based honestly —Tom Skeleton")
|
||||
(p (@ (class "testimonial")) ">Fandom pages crashing and closing, taking forever to load and locking up as they load the ads on the site... they are causing the site to crash because they are trying to load video ads both at the top and bottom of the site as well as two or three banner ads, then a massive top of site ad and eventually my anti-virus shuts the whole site down because it's literally pulling more resources than WoW in ultra settings... —Anonymous")
|
||||
(p (@ (class "testimonial")) ">reblogs EXTREMELY appreciated I want that twink* (*fandom wiki) obliterated —footlong")
|
||||
|
||||
(h2 "What BreezeWiki isn't")
|
||||
(p "BreezeWiki isn't an \"alternative\" to Fandom, and it doesn't let you edit or write new pages.")
|
||||
(p "If you want to create your own wiki, try Miraheze!")))
|
||||
|
||||
(define body
|
||||
`(*TOP*
|
||||
(*DECL* DOCTYPE html)
|
||||
(html
|
||||
(head
|
||||
(meta (@ (name "viewport") (content "width=device-width, initial-scale=1")))
|
||||
(title "About | BreezeWiki")
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "internal.css"))))
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href ,(get-static-url "main.css"))))
|
||||
(link (@ (rel "icon") (href ,(head-data^-icon-url head-data-default)))))
|
||||
(body (@ (class "skin-fandomdesktop theme-fandomdesktop-light internal"))
|
||||
(div (@ (class "main-container"))
|
||||
(div (@ (class "fandom-community-header__background tileBoth header")))
|
||||
(div (@ (class "page"))
|
||||
(main (@ (class "page__main"))
|
||||
(div (@ (class "custom-top"))
|
||||
(h1 (@ (class "page-title"))
|
||||
"About BreezeWiki"))
|
||||
(div (@ (id "content") #;(class "page-content"))
|
||||
(div (@ (id "mw-content-text"))
|
||||
,@content))
|
||||
,(application-footer #f))))))))
|
||||
`(html
|
||||
(head
|
||||
(meta (@ (name "viewport") (content "width=device-width, initial-scale=1")))
|
||||
(title "About | BreezeWiki")
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href "/static/internal.css")))
|
||||
(link (@ (rel "stylesheet") (type "text/css") (href "/static/main.css"))))
|
||||
(body (@ (class "skin-fandomdesktop theme-fandomdesktop-light internal"))
|
||||
(div (@ (class "main-container"))
|
||||
(div (@ (class "fandom-community-header__background tileBoth header")))
|
||||
(div (@ (class "page"))
|
||||
(main (@ (class "page__main"))
|
||||
(div (@ (class "custom-top"))
|
||||
(h1 (@ (class "page-title"))
|
||||
"About BreezeWiki"))
|
||||
(div (@ (id "content") #;(class "page-content"))
|
||||
(div (@ (id "mw-content-text"))
|
||||
,@content))
|
||||
,(application-footer #f)))))))
|
||||
(module+ test
|
||||
(check-not-false (xexp->html body)))
|
||||
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/dict
|
||||
net/url
|
||||
web-server/http
|
||||
web-server/dispatchers/dispatch
|
||||
"application-globals.rkt")
|
||||
|
||||
(provide
|
||||
page-it-works)
|
||||
|
||||
(define (page-it-works req)
|
||||
(define b? (dict-ref (url-query (request-uri req)) 'b #f))
|
||||
(if b?
|
||||
(generate-redirect "/stampylongnose/wiki/It_Works")
|
||||
(next-dispatcher)))
|
|
@ -9,8 +9,8 @@
|
|||
web-server/http
|
||||
(only-in web-server/dispatchers/dispatch next-dispatcher)
|
||||
"application-globals.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
page-proxy)
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
web-server/http
|
||||
"application-globals.rkt"
|
||||
"data.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
redirect-wiki-home)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
(require racket/dict
|
||||
racket/list
|
||||
racket/string
|
||||
(prefix-in easy: net/http-easy)
|
||||
; html libs
|
||||
html-writing
|
||||
; web server libs
|
||||
|
@ -12,75 +13,77 @@
|
|||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"search-provider-fandom.rkt"
|
||||
"search-provider-solr.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/thread-utils.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"syntax.rkt"
|
||||
"url-utils.rkt"
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
page-search)
|
||||
|
||||
(define search-providers
|
||||
(hash "fandom" search-fandom
|
||||
"solr" search-solr))
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(define search-json-data
|
||||
'#hasheq((batchcomplete . #t) (query . #hasheq((search . (#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))))))
|
||||
|
||||
;; this takes the info we gathered from fandom and makes the big fat x-expression page
|
||||
(define (generate-results-page req source-url wikiname query results-content #:siteinfo [siteinfo #f])
|
||||
;; this is *another* helper that builds the wiki page UI and lets me put the search results (or whatever else) in the middle
|
||||
(define (generate-results-page dest-url wikiname query data #:siteinfo [siteinfo #f])
|
||||
(define search-results (jp "/query/search" data))
|
||||
(generate-wiki-page
|
||||
;; so I provide my helper function with the necessary context...
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:source-url dest-url
|
||||
#:wikiname wikiname
|
||||
#:title query
|
||||
#:title "Search Results"
|
||||
#:siteinfo siteinfo
|
||||
;; and here's the actual results to display in the wiki page layout
|
||||
results-content))
|
||||
`(div (@ (class "mw-parser-output"))
|
||||
(p ,(format "~a results found for " (length search-results))
|
||||
(strong ,query))
|
||||
(ul ,@(map
|
||||
(λ (result)
|
||||
(let* ([title (jp "/title" result)]
|
||||
[page-path (regexp-replace* #rx" " title "_")]
|
||||
[timestamp (jp "/timestamp" result)]
|
||||
[wordcount (jp "/wordcount" result)]
|
||||
[size (jp "/size" result)])
|
||||
`(li (@ (class "my-result"))
|
||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path)))
|
||||
,title)
|
||||
(div (@ (class "my-result__info"))
|
||||
"last edited "
|
||||
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
||||
,(format ", ~a words, ~a kb"
|
||||
wordcount
|
||||
(exact->inexact (/ (round (/ size 100)) 10)))))))
|
||||
search-results)))))
|
||||
|
||||
;; will be called when the web browser asks to load the page
|
||||
(define (page-search req)
|
||||
;; this just means, catch any errors and display them in the browser. it's a function somewhere else
|
||||
(response-handler
|
||||
;; the URL will look like "/minecraft/wiki/Special:Search?q=Spawner"
|
||||
;; grab the first part to use as the wikiname, in this case, "minecraft"
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
;; grab a dict of url search params
|
||||
(define params (url-query (request-uri req)))
|
||||
;; grab the part after ?q= which is the search terms
|
||||
(define query (dict-ref params 'q #f))
|
||||
;; figure out which search provider we're going to use
|
||||
(define search-provider (hash-ref search-providers (config-get 'feature_offline::search)
|
||||
(λ () (error 'search-provider "unknown search provider configured"))))
|
||||
(define query (dict-ref (url-query (request-uri req)) 'q #f))
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(define dest-url
|
||||
(format "~a/api.php?~a"
|
||||
origin
|
||||
(params->query `(("action" . "query")
|
||||
("list" . "search")
|
||||
("srsearch" . ,query)
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
|
||||
;; external special:search url to link at the bottom of the page as the upstream source
|
||||
(define external-search-url
|
||||
(format "https://~a.fandom.com/wiki/Special:Search?~a"
|
||||
wikiname
|
||||
(params->query `(("query" . ,query)
|
||||
("search" . "internal")))))
|
||||
(thread-let
|
||||
([dest-res (log-outgoing dest-url)
|
||||
(easy:get dest-url #:timeouts timeouts)]
|
||||
[siteinfo (siteinfo-fetch wikiname)])
|
||||
|
||||
;; simultaneously get the search results, as well as information about the wiki as a whole (its license, icon, name)
|
||||
(define-values (results-content siteinfo)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(search-provider wikiname query params)) ;; call the search provider (see file "search-provider-fandom.rkt")
|
||||
(λ ()
|
||||
(siteinfo-fetch wikiname)))) ;; helper function in another file to get information about the wiki
|
||||
|
||||
;; calling my generate-results-page function with the information so far in order to get a big fat x-expression
|
||||
;; big fat x-expression goes into the body variable
|
||||
(define body (generate-results-page req external-search-url wikiname query results-content #:siteinfo siteinfo))
|
||||
;; error checking
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
;; convert body to HTML and send to browser
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out)
|
||||
(write-html body out)))))
|
||||
(define data (easy:response-json dest-res))
|
||||
|
||||
(define body (generate-results-page dest-url wikiname query data #:siteinfo siteinfo))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (build-headers always-headers)
|
||||
(λ (out)
|
||||
(write-html body out))))))
|
||||
(module+ test
|
||||
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
|
||||
(generate-results-page "" "test" "Gacha" search-json-data)))))
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/dict
|
||||
net/url
|
||||
web-server/http
|
||||
"application-globals.rkt"
|
||||
"data.rkt"
|
||||
"log.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
page-set-user-settings)
|
||||
|
||||
(define (page-set-user-settings req)
|
||||
(response-handler
|
||||
(define next-location (dict-ref (url-query (request-uri req)) 'next_location))
|
||||
(define new-settings (read (open-input-string (dict-ref (url-query (request-uri req)) 'new_settings))))
|
||||
(log-set-settings-request (user-cookies^-theme new-settings))
|
||||
(define headers (user-cookies-setter new-settings))
|
||||
(generate-redirect next-location #:headers headers)))
|
|
@ -1,94 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/file
|
||||
racket/path
|
||||
racket/port
|
||||
racket/string
|
||||
net/url
|
||||
web-server/http
|
||||
web-server/servlet-dispatch
|
||||
web-server/dispatchers/filesystem-map
|
||||
(only-in web-server/dispatchers/dispatch next-dispatcher)
|
||||
"../archiver/archiver.rkt"
|
||||
"../lib/mime-types.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"config.rkt"
|
||||
"log.rkt")
|
||||
|
||||
(provide
|
||||
page-static-archive)
|
||||
|
||||
(define path-archive (anytime-path ".." "storage/archive"))
|
||||
|
||||
(define ((replacer wikiname) whole url)
|
||||
(format
|
||||
"url(~a)"
|
||||
(if (or (equal? url "")
|
||||
(equal? url "'")
|
||||
(string-contains? url "/resources-ucp/")
|
||||
(string-contains? url "/fonts/")
|
||||
(string-contains? url "/drm_fonts/")
|
||||
(string-contains? url "//db.onlinewebfonts.com/")
|
||||
(string-contains? url "//bits.wikimedia.org/")
|
||||
(string-contains? url "mygamercard.net/")
|
||||
(string-contains? url "dropbox")
|
||||
(string-contains? url "only=styles")
|
||||
(string-contains? url "https://https://")
|
||||
(regexp-match? #rx"^%20|^'" url)
|
||||
(regexp-match? #rx"^\"?data:" url)
|
||||
(regexp-match? #rx"^file:" url))
|
||||
url
|
||||
(let* ([norm-url
|
||||
(cond
|
||||
[(string-prefix? url "https://") url]
|
||||
[(string-prefix? url "http://") (regexp-replace #rx"http:" url "https:")]
|
||||
[(string-prefix? url "httpshttps://") (regexp-replace #rx"httpshttps://" url "https://")]
|
||||
[(string-prefix? url "//") (string-append "https:" url)]
|
||||
[(string-prefix? url "/") (format "https://~a.fandom.com~a" wikiname url)]
|
||||
[else (error 'replace-style-for-images "unknown URL format: ~a" url)])])
|
||||
(define p (image-url->values norm-url))
|
||||
;; (printf "hashed: ~a~n -> ~a~n #-> ~a~n" url (car p) (cdr p))
|
||||
(format "/archive/~a/images/~a" wikiname (cdr p))))))
|
||||
|
||||
(define (replace-style-for-images wikiname path)
|
||||
(define content (file->string path))
|
||||
(regexp-replace* #rx"url\\(\"?'?([^)]*)'?\"?\\)" content (replacer wikiname)))
|
||||
|
||||
(define (handle-style wikiname dest)
|
||||
(when (config-true? 'debug)
|
||||
(printf "using offline mode for style ~a ~a~n" wikiname dest))
|
||||
(log-styles-request #t wikiname dest)
|
||||
(define fs-path (build-path path-archive wikiname "styles" dest))
|
||||
(unless (file-exists? fs-path)
|
||||
(next-dispatcher))
|
||||
(response-handler
|
||||
(define new-content (replace-style-for-images wikiname fs-path))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (list (header #"Content-Type" #"text/css")
|
||||
(header #"Referrer-Policy" #"same-origin"))
|
||||
(λ (out) (displayln new-content out)))))
|
||||
|
||||
(define (handle-image wikiname dest) ;; dest is the hash with no extension
|
||||
(unless ((string-length dest) . >= . 40) (next-dispatcher))
|
||||
(response-handler
|
||||
(define dir (build-path path-archive wikiname "images" (substring dest 0 1) (substring dest 0 2)))
|
||||
(unless (directory-exists? dir) (next-dispatcher))
|
||||
(define candidates (directory-list dir))
|
||||
(define target (path->string (findf (λ (f) (string-prefix? (path->string f) dest)) candidates)))
|
||||
(unless target (next-dispatcher))
|
||||
(define ext (substring target 41))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers (list (header #"Content-Type" (ext->mime-type (string->bytes/latin-1 ext))))
|
||||
(λ (out)
|
||||
(call-with-input-file (build-path dir target)
|
||||
(λ (in)
|
||||
(copy-port in out)))))))
|
||||
|
||||
(define (page-static-archive req)
|
||||
(define path (url-path (request-uri req)))
|
||||
(define-values (_ wikiname kind dest) (apply values (map path/param-path path)))
|
||||
(cond [(equal? kind "styles") (handle-style wikiname dest)]
|
||||
[(equal? kind "images") (handle-image wikiname dest)]
|
||||
[else (response-handler (raise-user-error "page-static-archive: how did we get here?" kind))]))
|
|
@ -7,8 +7,6 @@
|
|||
web-server/dispatchers/filesystem-map
|
||||
(only-in web-server/dispatchers/dispatch next-dispatcher)
|
||||
(prefix-in files: web-server/dispatchers/dispatch-files)
|
||||
"../lib/mime-types.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"config.rkt")
|
||||
|
||||
(provide
|
||||
|
@ -18,61 +16,52 @@
|
|||
(require rackunit))
|
||||
|
||||
(define-runtime-path path-static "../static")
|
||||
(define path-archive (anytime-path ".." "storage/archive"))
|
||||
|
||||
(define hash-ext-mime-type
|
||||
(hash #".css" #"text/css"
|
||||
#".js" #"text/javascript"
|
||||
#".png" #"image/png"
|
||||
#".svg" #"image/svg+xml"
|
||||
#".woff2" #"font/woff2"
|
||||
#".txt" #"text/plain"))
|
||||
|
||||
(define (ext->mime-type ext)
|
||||
(hash-ref hash-ext-mime-type ext))
|
||||
(module+ test
|
||||
(check-equal? (ext->mime-type #".png") #"image/png"))
|
||||
|
||||
(define (make-path segments)
|
||||
(map (λ (seg) (path/param seg '())) segments))
|
||||
(module+ test
|
||||
(check-equal? (make-path '("static" "main.css"))
|
||||
(list (path/param "static" '()) (path/param "main.css" '()))))
|
||||
|
||||
;; given a request path, return a rewritten request path and the source directory on the filesystem to serve based on
|
||||
(define (path-rewriter p)
|
||||
(cond
|
||||
; url is ^/static/... ?
|
||||
[(equal? (path/param-path (car p)) "static")
|
||||
; rewrite to ^/... which will be treated as relative to static/ on the filesystem
|
||||
(values (cdr p) path-static)]
|
||||
; url is ^/archive/... ?
|
||||
[(equal? (path/param-path (car p)) "archive")
|
||||
; rewrite req to ^/<wikiname> and dir to /storage/archive
|
||||
(values (cdr p) path-archive)]
|
||||
(cdr p)]
|
||||
; url is literally ^/robots.txt
|
||||
[(equal? p (make-path '("robots.txt")))
|
||||
; rewrite to ^/... -- it already is!
|
||||
(values p path-static)]
|
||||
p]
|
||||
; not going to use the static file dispatcher
|
||||
[#t (next-dispatcher)]))
|
||||
(module+ test
|
||||
(check-equal? (call-with-values (λ () (path-rewriter (make-path '("static" "main.css")))) cons)
|
||||
(cons (make-path '("main.css")) path-static))
|
||||
(check-equal? (call-with-values (λ () (path-rewriter (make-path '("static" "robots.txt")))) cons)
|
||||
(cons (make-path '("robots.txt")) path-static))
|
||||
(check-equal? (call-with-values (λ () (path-rewriter (make-path '("robots.txt")))) cons)
|
||||
(cons (make-path '("robots.txt")) path-static))
|
||||
(check-equal? (call-with-values (λ () (path-rewriter (make-path '("archive" "minecraft" "styles" "main.css")))) cons)
|
||||
(cons (make-path '("minecraft" "styles" "main.css")) path-archive)))
|
||||
(check-equal? (path-rewriter (make-path '("static" "main.css")))
|
||||
(make-path '("main.css")))
|
||||
(check-equal? (path-rewriter (make-path '("static" "robots.txt")))
|
||||
(make-path '("robots.txt")))
|
||||
(check-equal? (path-rewriter (make-path '("robots.txt")))
|
||||
(make-path '("robots.txt"))))
|
||||
|
||||
(define (static-dispatcher conn old-req)
|
||||
(define old-uri (request-uri old-req))
|
||||
(define old-path (url-path old-uri))
|
||||
(define-values (new-path source-dir) (path-rewriter old-path))
|
||||
(define new-path (path-rewriter old-path))
|
||||
(define new-uri (struct-copy url old-uri [path new-path]))
|
||||
(define new-req (struct-copy request old-req [uri new-uri]))
|
||||
((files:make
|
||||
#:url->path (lambda (u) ((make-url->path source-dir) u))
|
||||
#:path->headers (lambda (p) (list (header #"Access-Control-Allow-Origin" #"*")
|
||||
(header #"Referrer-Policy" #"same-origin")))
|
||||
#:url->path (lambda (u) ((make-url->path path-static) u))
|
||||
#:path->mime-type (lambda (u) (ext->mime-type (path-get-extension u)))
|
||||
#:cache-no-cache (config-true? 'debug)
|
||||
#:cache-immutable (not (config-true? 'debug))
|
||||
#:cache-max-age (if (config-true? 'debug) #f 604800))
|
||||
#:cache-no-cache (config-true? 'debug) #;"browser applies heuristics if unset")
|
||||
conn new-req))
|
||||
|
|
|
@ -1,65 +1,22 @@
|
|||
#lang racket/base
|
||||
(require racket/match
|
||||
racket/path
|
||||
(require racket/path
|
||||
racket/string
|
||||
net/url
|
||||
web-server/http
|
||||
web-server/dispatchers/dispatch
|
||||
(only-in racket/promise delay)
|
||||
(prefix-in lift: web-server/dispatchers/dispatch-lift)
|
||||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
subdomain-dispatcher)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
(define (do-redirect:make subdomain canonical-origin)
|
||||
(define (subdomain-dispatcher subdomain)
|
||||
(lift:make
|
||||
(λ (req)
|
||||
(response-handler
|
||||
(define uri (request-uri req))
|
||||
(define path (url-path uri))
|
||||
(define path-string (string-join (map (λ (p) (path/param-path p)) path) "/"))
|
||||
(define dest (format "~a/~a/~a" canonical-origin subdomain path-string))
|
||||
(define dest (format "~a/~a/~a" (config-get 'canonical_origin) subdomain path-string))
|
||||
(generate-redirect dest)))))
|
||||
|
||||
(define (router req)
|
||||
(define host (bytes->string/utf-8 (header-value (headers-assq* #"host" (request-headers/raw req)))))
|
||||
(define x-canonical-origin (headers-assq* #"x-canonical-origin" (request-headers/raw req)))
|
||||
(define canonical-origin
|
||||
(cond
|
||||
[x-canonical-origin (bytes->string/utf-8 (header-value x-canonical-origin))]
|
||||
[(config-true? 'canonical_origin) (config-get 'canonical_origin)]
|
||||
[#t #f]))
|
||||
(if/out canonical-origin
|
||||
(let* ([canonical-origin-host (url-host (string->url canonical-origin))])
|
||||
(if/in canonical-origin-host
|
||||
(let* ([splitter (string-append "." (url-host (string->url canonical-origin)))]
|
||||
[s (string-split host splitter #:trim? #f)])
|
||||
(if/in (and (eq? 2 (length s)) (equal? "" (cadr s)))
|
||||
(list 'redirect (car s) canonical-origin)))))
|
||||
'next-dispatcher))
|
||||
(module+ test
|
||||
(define (qr url headers)
|
||||
(request #"GET" (string->url url) (map (λ (h) (header (car h) (cadr h))) headers) (delay '()) #f "127.0.0.1" 10416 "127.0.0.1"))
|
||||
(parameterize ([(config-parameter 'canonical_origin) "https://breezewiki.com"])
|
||||
(check-equal? (router (qr "/" '((#"Host" #"breezewiki.com"))))
|
||||
'next-dispatcher)
|
||||
(check-equal? (router (qr "/wiki/Spell" '((#"Host" #"magic.breezewiki.com"))))
|
||||
'(redirect "magic" "https://breezewiki.com"))
|
||||
(check-equal? (router (qr "/" '((#"Host" #"magic.bw.breezewiki.com")
|
||||
(#"X-Canonical-Origin" #"https://bw.breezewiki.com"))))
|
||||
'(redirect "magic" "https://bw.breezewiki.com"))
|
||||
(check-equal? (router (qr "/" '((#"Host" #"magic.bwxxxxx.onion")
|
||||
(#"X-Canonical-Origin" #"http://bwxxxxx.onion"))))
|
||||
'(redirect "magic" "http://bwxxxxx.onion"))))
|
||||
|
||||
(define (subdomain-dispatcher conn req)
|
||||
(match (router req)
|
||||
[(list 'redirect subdomain canonical-origin) ((do-redirect:make subdomain canonical-origin) conn req)]
|
||||
[_ (next-dispatcher)]))
|
||||
|
|
|
@ -1,165 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/dict
|
||||
racket/file
|
||||
racket/function
|
||||
racket/list
|
||||
racket/match
|
||||
racket/path
|
||||
racket/string
|
||||
; libs
|
||||
(prefix-in easy: net/http-easy)
|
||||
file/sha1
|
||||
file/gunzip
|
||||
json
|
||||
; html libs
|
||||
"../lib/html-parsing/main.rkt"
|
||||
html-writing
|
||||
; web server libs
|
||||
net/url
|
||||
web-server/http
|
||||
web-server/dispatchers/dispatch
|
||||
; my libs
|
||||
"application-globals.rkt"
|
||||
"../archiver/archiver-database.rkt"
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"log.rkt"
|
||||
"page-wiki.rkt"
|
||||
"../lib/archive-file-mappings.rkt"
|
||||
"../lib/pure-utils.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/tree-updater.rkt"
|
||||
"../lib/xexpr-utils.rkt"
|
||||
"../lib/url-utils.rkt")
|
||||
|
||||
(provide
|
||||
; used by the web server
|
||||
page-wiki-offline)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
|
||||
(define path-archive (anytime-path ".." "storage/archive"))
|
||||
|
||||
(when (config-true? 'feature_offline::only)
|
||||
(void (get-slc)))
|
||||
|
||||
(define (page-wiki-offline req)
|
||||
(response-handler
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
|
||||
(define basename (url-segments->basename segments))
|
||||
(define maybe-hashed-basename (if ((string-length basename) . > . 240)
|
||||
(sha1 (string->bytes/latin-1 basename))
|
||||
basename))
|
||||
|
||||
(define user-cookies (user-cookies-getter req))
|
||||
(define theme (user-cookies^-theme user-cookies))
|
||||
|
||||
(log-page-request #t wikiname maybe-hashed-basename theme)
|
||||
|
||||
(define archive-format
|
||||
(case (config-get 'feature_offline::format)
|
||||
[(".json" "json") (cons "~a.json" (λ () (read-json)))]
|
||||
[(".json.gz" "json.gz") (cons "~a.json.gz" (λ ()
|
||||
(define-values (in out) (make-pipe))
|
||||
(gunzip-through-ports (current-input-port) out)
|
||||
(read-json in)))]
|
||||
[else (error 'archive-format "unknown archive format configured")]))
|
||||
(define fs-path (build-path path-archive wikiname (format (car archive-format) maybe-hashed-basename)))
|
||||
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname (basename->name-for-query basename)))
|
||||
(cond/var
|
||||
|
||||
[(file-exists? fs-path)
|
||||
(when (config-true? 'debug)
|
||||
(printf "using offline mode for ~v~n" fs-path))
|
||||
(response-handler
|
||||
(define data (with-input-from-file fs-path (cdr archive-format)))
|
||||
(define article-title (jp "/parse/title" data))
|
||||
(define original-page (html->xexp (preprocess-html-wiki (jp "/parse/text" data))))
|
||||
(define page ((query-selector (λ (t a c) (has-class? "mw-parser-output" a)) original-page)))
|
||||
(define initial-head-data ((head-data-getter wikiname) data))
|
||||
(define head-data
|
||||
(case theme
|
||||
[(light dark)
|
||||
(struct-copy head-data^ initial-head-data
|
||||
[body-class (regexp-replace #rx"(theme-fandomdesktop-)(light|dark)"
|
||||
(head-data^-body-class initial-head-data)
|
||||
(format "\\1~a" theme))])]
|
||||
[else initial-head-data]))
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
(update-tree-wiki page wikiname)
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title article-title
|
||||
#:online-styles #f
|
||||
#:head-data head-data
|
||||
#:siteinfo (siteinfo-fetch wikiname)
|
||||
))
|
||||
(define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes"))
|
||||
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
|
||||
(define redirect-msg-a (if redirect-msg
|
||||
((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))
|
||||
#f))
|
||||
(define headers
|
||||
(build-headers
|
||||
always-headers
|
||||
; redirect-query-parameter: only the string "no" is significant:
|
||||
; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367
|
||||
(when (and redirect-msg-a
|
||||
(not (equal? redirect-query-parameter "no")))
|
||||
(let* ([dest (get-attribute 'href (bits->attributes redirect-msg-a))]
|
||||
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
|
||||
(header #"Refresh" value)))))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers headers
|
||||
(λ (out)
|
||||
(write-html body out))))]
|
||||
|
||||
;; page not found on disk, perhaps it's a redirect? redirects are stored in the database
|
||||
(var target (query-maybe-value* "select redirect from page where wikiname = ? and basename = ?" wikiname basename))
|
||||
[target
|
||||
; don't url decode the target, or Category: pages will be interpreted as a protocol
|
||||
(generate-redirect (format "/~a/wiki/~a" wikiname (regexp-replace* #rx"#" target "/")))]
|
||||
|
||||
;; breezewiki doesn't have the page archived, see if we can make a network request for it
|
||||
[(not (config-true? 'feature_offline::only))
|
||||
(next-dispatcher)]
|
||||
|
||||
;; no possible way to provide the page
|
||||
[else
|
||||
(define mirror-path (url->string (request-uri req)))
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
`(div (@ (class "unsaved-page"))
|
||||
(style ".unsaved-page a { text-decoration: underline !important }")
|
||||
(p "breezewiki.com doesn't have this page saved.")
|
||||
(p "You can see this page by visiting a BreezeWiki mirror:")
|
||||
(ul
|
||||
(li (a (@ (href ,(format "https://antifandom.com~a" mirror-path))) "View on antifandom.com"))
|
||||
(li (a (@ (href ,(format "https://bw.artemislena.eu~a" mirror-path))) "View on artemislena.eu"))
|
||||
(li (a (@ (href ,source-url)) "or, you can see the original page on Fandom (ugh)")))
|
||||
(p "If you'd like " ,wikiname ".fandom.com to be added to breezewiki.com, " (a (@ (href "https://lists.sr.ht/~cadence/breezewiki-requests")) "let me know about it!")))
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title (url-segments->guess-title segments)
|
||||
#:online-styles #f
|
||||
#:siteinfo (siteinfo-fetch wikiname)
|
||||
))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers always-headers
|
||||
(λ (out)
|
||||
(write-html body out)))])))
|
|
@ -7,7 +7,7 @@
|
|||
; libs
|
||||
(prefix-in easy: net/http-easy)
|
||||
; html libs
|
||||
"../lib/html-parsing/main.rkt"
|
||||
html-parsing
|
||||
html-writing
|
||||
; web server libs
|
||||
net/url
|
||||
|
@ -17,14 +17,10 @@
|
|||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"data.rkt"
|
||||
"fandom-request.rkt"
|
||||
"../lib/archive-file-mappings.rkt"
|
||||
"../lib/pure-utils.rkt"
|
||||
"../lib/syntax.rkt"
|
||||
"../lib/thread-utils.rkt"
|
||||
"../lib/tree-updater.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
"pure-utils.rkt"
|
||||
"syntax.rkt"
|
||||
"xexpr-utils.rkt"
|
||||
"url-utils.rkt")
|
||||
|
||||
(provide
|
||||
; used by the web server
|
||||
|
@ -34,101 +30,288 @@
|
|||
preprocess-html-wiki)
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
(require rackunit)
|
||||
(define wiki-document
|
||||
'(*TOP*
|
||||
(div (@ (class "mw-parser-output"))
|
||||
(aside (@ (role "region") (class "portable-infobox pi-theme-wikia pi-layout-default"))
|
||||
(h2 (@ (class "pi-item pi-title") (data-source "title"))
|
||||
"Infobox Title")
|
||||
(figure (@ (class "pi-item pi-image") (data-source "image"))
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image image-thumbnail") (title ""))
|
||||
(img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png") (class "pi-image-thumbnail")))))
|
||||
(div (@ (class "pi-item pi-data") (data-source "description"))
|
||||
(h3 (@ (class "pi-data-label"))
|
||||
"Description")
|
||||
(div (@ (class "pi-data-value"))
|
||||
"Mystery infobox!")))
|
||||
(div (@ (data-test-collapsesection) (class "collapsible collapsetoggle-inline collapsed"))
|
||||
(i (b "This section is hidden for dramatic effect."))
|
||||
(div (@ (class "collapsible-content"))
|
||||
(p "Another page link: "
|
||||
(a (@ (data-test-wikilink) (href "https://test.fandom.com/wiki/Another_Page") (title "Another Page"))
|
||||
"Another Page"))))
|
||||
(figure (@ (class "thumb tnone"))
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image"))
|
||||
(img (@ (src "data:image/gif;base64,R0lGODlhAQABAIABAAAAAP///yH5BAEAAAEALAAAAAABAAEAQAICTAEAOw%3D%3D")
|
||||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(class "thumbimage lazyload"))))
|
||||
(noscript
|
||||
(a (@ (href "https://static.wikia.nocookie.net/nice-image.png") (class "image"))
|
||||
(img (@ (src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(data-src "https://static.wikia.nocookie.net/nice-image-thumbnail.png")
|
||||
(class "thumbimage")))))
|
||||
(figcaption "Test figure!"))
|
||||
(iframe (@ (src "https://example.com/iframe-src")))))))
|
||||
|
||||
(define (preprocess-html-wiki html)
|
||||
(define (rr* find replace contents)
|
||||
(regexp-replace* find contents replace))
|
||||
((compose1
|
||||
; fix navbox list nesting
|
||||
; navbox on right of page has incorrect html "<td ...><li>" and the xexpr parser puts the <li> much further up the tree
|
||||
; add a <ul> to make the parser happy
|
||||
; usage: /fallout/wiki/Fallout:_New_Vegas_achievements_and_trophies
|
||||
(curry rr* #rx"(<td[^>]*>\n?)(<li>)" "\\1<ul>\\2")
|
||||
; change <figcaption><p> to <figcaption><span> to make the parser happy
|
||||
(curry rr* #rx"(<figcaption[^>]*>)[ \t]*<p class=\"caption\">([^<]*)</p>" "\\1<span class=\"caption\">\\2</span>"))
|
||||
html))
|
||||
(module+ test
|
||||
(check-equal? (preprocess-html-wiki "<td class=\"va-navbox-column\" style=\"width: 33%\">\n<li>Hey</li>")
|
||||
"<td class=\"va-navbox-column\" style=\"width: 33%\">\n<ul><li>Hey</li>")
|
||||
(check-equal? (preprocess-html-wiki "<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"> <p class=\"caption\">Caption text.</p></figcaption></figure>")
|
||||
"<figure class=\"thumb tright\" style=\"width: 150px\"><a class=\"image\"><img></a><noscript><a><img></a></noscript><figcaption class=\"thumbcaption\"><span class=\"caption\">Caption text.</span></figcaption></figure>"))
|
||||
|
||||
(define (update-tree-wiki tree wikiname)
|
||||
(update-tree
|
||||
(λ (element element-type attributes children)
|
||||
;; replace whole element?
|
||||
(cond
|
||||
; wrap tables in a div.table-scroller
|
||||
[(and (eq? element-type 'table)
|
||||
(has-class? "wikitable" attributes)
|
||||
(not (dict-has-key? attributes 'data-scrolling)))
|
||||
`(div
|
||||
((class "table-scroller"))
|
||||
((,element-type (@ (data-scrolling) ,@attributes)
|
||||
,@children)))]
|
||||
; exclude empty figcaptions
|
||||
[(and (eq? element-type 'figcaption)
|
||||
(or (eq? (length (filter element-is-element? children)) 0)
|
||||
((query-selector (λ (element-type attributes children)
|
||||
(eq? element-type 'use))
|
||||
element))))
|
||||
return-no-element]
|
||||
; exclude infobox items that are videos, and gallery items that are videos
|
||||
[(and (or (has-class? "pi-item" attributes)
|
||||
(has-class? "wikia-gallery-item" attributes))
|
||||
((query-selector (λ (element-type attributes children)
|
||||
(has-class? "video-thumbnail" attributes))
|
||||
element)))
|
||||
return-no-element]
|
||||
; exclude the invisible brackets after headings
|
||||
[(and (eq? element-type 'span)
|
||||
(has-class? "mw-editsection" attributes))
|
||||
return-no-element]
|
||||
; display a link instead of an iframe
|
||||
[(eq? element-type 'iframe)
|
||||
(define src (car (dict-ref attributes 'src null)))
|
||||
`(a
|
||||
((class "iframe-alternative") (href ,src))
|
||||
(,(format "Embedded media: ~a" src)))]
|
||||
; remove noscript versions of images because they are likely lower quality than the script versions
|
||||
[(and (eq? element-type 'noscript)
|
||||
(match children
|
||||
; either the noscript has a.image as a first child...
|
||||
[(list (list 'a (list '@ a-att ...) _)) (has-class? "image" a-att)]
|
||||
; or the noscript has img as a first child
|
||||
[(list (list 'img _)) #t]
|
||||
[_ #f]))
|
||||
return-no-element]
|
||||
[#t
|
||||
(list element-type
|
||||
;; attributes
|
||||
((compose1
|
||||
; uncollapsing
|
||||
(curry attribute-maybe-update 'class
|
||||
(λ (class)
|
||||
(string-join
|
||||
((compose1
|
||||
; uncollapse all navbox items (bottom of page mass navigation)
|
||||
(curry u
|
||||
(λ (classlist) (and (eq? element-type 'table)
|
||||
(member "navbox" classlist)
|
||||
(member "collapsed" classlist)))
|
||||
(λ (classlist) (filter (curry (negate equal?) "collapsed") classlist)))
|
||||
; uncollapse portable-infobox sections
|
||||
(curry u
|
||||
(λ (classlist) (and (eq? element-type 'section)
|
||||
(member "pi-collapse" classlist)))
|
||||
(λ (classlist) (filter (λ (v)
|
||||
(and (not (equal? v "pi-collapse-closed"))
|
||||
(not (equal? v "pi-collapse"))))
|
||||
classlist)))
|
||||
; generic: includes article sections and tables, probably more
|
||||
(curry u
|
||||
(λ (classlist) (and (member "collapsible" classlist)
|
||||
(member "collapsed" classlist)))
|
||||
(λ (classlist) (filter (curry (negate equal?) "collapsed") classlist))))
|
||||
(string-split class " "))
|
||||
" ")))
|
||||
; change links to stay on the same wiki
|
||||
(curry attribute-maybe-update 'href
|
||||
(λ (href)
|
||||
((compose1
|
||||
(λ (href) (regexp-replace #rx"^(/wiki/.*)" href (format "/~a\\1" wikiname)))
|
||||
(λ (href) (regexp-replace (pregexp (format "^https://(~a)\\.fandom\\.com(/wiki/.*)" px-wikiname)) href "/\\1\\2")))
|
||||
href)))
|
||||
; add noreferrer to a.image
|
||||
(curry u
|
||||
(λ (v) (and (eq? element-type 'a)
|
||||
(has-class? "image" v)))
|
||||
(λ (v) (dict-update v 'rel (λ (s)
|
||||
(list (string-append (car s) " noreferrer")))
|
||||
'(""))))
|
||||
; proxy images from inline styles, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v) (config-true? 'strict_proxy))
|
||||
(λ (v) (attribute-maybe-update 'style
|
||||
(λ (style)
|
||||
(regexp-replace #rx"url\\(['\"]?(.*?)['\"]?\\)" style
|
||||
(λ (whole url)
|
||||
(string-append
|
||||
"url("
|
||||
(u-proxy-url url)
|
||||
")")))) v)))
|
||||
; and also their links, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v)
|
||||
(and (config-true? 'strict_proxy)
|
||||
(eq? element-type 'a)
|
||||
(has-class? "image-thumbnail" v)))
|
||||
(λ (v) (attribute-maybe-update 'href u-proxy-url v)))
|
||||
; proxy images from src attributes, if strict_proxy is set
|
||||
(curry u
|
||||
(λ (v) (config-true? 'strict_proxy))
|
||||
(λ (v) (attribute-maybe-update 'src u-proxy-url v)))
|
||||
; don't lazyload images
|
||||
(curry u
|
||||
(λ (v) (dict-has-key? v 'data-src))
|
||||
(λ (v) (attribute-maybe-update 'src (λ (_) (car (dict-ref v 'data-src))) v)))
|
||||
; don't use srcset - TODO: use srcset?
|
||||
(λ (v) (dict-remove v 'srcset)))
|
||||
attributes)
|
||||
;; children
|
||||
((compose1
|
||||
; wrap blinking animated images in a slot so they can be animated with CSS
|
||||
(curry u
|
||||
(λ (v) (and (has-class? "animated" attributes)
|
||||
((length v) . > . 1)))
|
||||
(λ (v)
|
||||
`((span (@ (class "animated-slot__outer") (style ,(format "--steps: ~a" (length v))))
|
||||
(span (@ (class "animated-slot__inner"))
|
||||
,@v))))))
|
||||
children))]))
|
||||
tree))
|
||||
(module+ test
|
||||
(define transformed
|
||||
(parameterize ([(config-parameter 'strict_proxy) "true"])
|
||||
(update-tree-wiki wiki-document "test")))
|
||||
; check that wikilinks are changed to be local
|
||||
(check-equal? (get-attribute 'href (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (dict-has-key? a 'data-test-wikilink))
|
||||
transformed))))
|
||||
"/test/wiki/Another_Page")
|
||||
; check that a.image has noreferrer
|
||||
(check-equal? (get-attribute 'rel (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (and (eq? t 'a)
|
||||
(has-class? "image" a)))
|
||||
transformed))))
|
||||
" noreferrer")
|
||||
; check that article collapse sections become uncollapsed
|
||||
(check-equal? (get-attribute 'class (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (dict-has-key? a 'data-test-collapsesection))
|
||||
transformed))))
|
||||
"collapsible collapsetoggle-inline")
|
||||
; check that iframes are gone
|
||||
(check-false ((query-selector (λ (t a c) (eq? t 'iframe)) transformed)))
|
||||
(check-equal? (let* ([alternative ((query-selector (λ (t a c) (has-class? "iframe-alternative" a)) transformed))]
|
||||
[link ((query-selector (λ (t a c) (eq? t 'a)) alternative))])
|
||||
(get-attribute 'href (bits->attributes link)))
|
||||
"https://example.com/iframe-src")
|
||||
; check that images are proxied
|
||||
(check-equal? (get-attribute 'src (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (eq? t 'img))
|
||||
transformed))))
|
||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image-thumbnail.png")
|
||||
; check that links to images are proxied
|
||||
(check-equal? (get-attribute 'href (bits->attributes
|
||||
((query-selector
|
||||
(λ (t a c) (and (eq? t 'a) (has-class? "image-thumbnail" a)))
|
||||
transformed))))
|
||||
"/proxy?dest=https%3A%2F%2Fstatic.wikia.nocookie.net%2Fnice-image.png")
|
||||
; check that noscript images are removed
|
||||
(check-equal? ((query-selector (λ (t a c) (eq? t 'noscript)) transformed)) #f))
|
||||
|
||||
(define (page-wiki req)
|
||||
(define wikiname (path/param-path (first (url-path (request-uri req)))))
|
||||
(define segments (map path/param-path (cdr (url-path (request-uri req)))))
|
||||
(define user-cookies (user-cookies-getter req))
|
||||
(define path (string-join (cdr segments) "/"))
|
||||
(define origin (format "https://~a.fandom.com" wikiname))
|
||||
(define path (string-join (map path/param-path (cddr (url-path (request-uri req)))) "/"))
|
||||
(define source-url (format "https://~a.fandom.com/wiki/~a" wikiname path))
|
||||
|
||||
(define-values (dest-res siteinfo)
|
||||
(thread-values
|
||||
(λ ()
|
||||
(fandom-get-api
|
||||
wikiname
|
||||
`(("action" . "parse")
|
||||
("page" . ,path)
|
||||
("prop" . "text|headhtml|langlinks")
|
||||
("formatversion" . "2")
|
||||
("format" . "json"))
|
||||
#:headers `#hasheq((cookie . ,(format "theme=~a" (user-cookies^-theme user-cookies))))))
|
||||
(λ ()
|
||||
(siteinfo-fetch wikiname))))
|
||||
(thread-let
|
||||
([dest-res (define dest-url
|
||||
(format "~a/api.php?~a"
|
||||
origin
|
||||
(params->query `(("action" . "parse")
|
||||
("page" . ,path)
|
||||
("prop" . "text|headhtml|langlinks")
|
||||
("formatversion" . "2")
|
||||
("format" . "json")))))
|
||||
(log-outgoing dest-url)
|
||||
(easy:get dest-url #:timeouts timeouts)]
|
||||
[siteinfo (siteinfo-fetch wikiname)])
|
||||
|
||||
(cond
|
||||
[(eq? 200 (easy:response-status-code dest-res))
|
||||
(let* ([data (easy:response-json dest-res)]
|
||||
[title (jp "/parse/title" data "")]
|
||||
[page-html (jp "/parse/text" data "")]
|
||||
[page-html (preprocess-html-wiki page-html)]
|
||||
[page (html->xexp page-html)]
|
||||
[head-data ((head-data-getter wikiname) data)])
|
||||
(if (equal? "missingtitle" (jp "/error/code" data #f))
|
||||
(next-dispatcher)
|
||||
(response-handler
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
(update-tree-wiki page wikiname)
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:head-data head-data
|
||||
#:siteinfo siteinfo))
|
||||
(define redirect-query-parameter (dict-ref (url-query (request-uri req)) 'redirect "yes"))
|
||||
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
|
||||
(define redirect-msg-a (if redirect-msg
|
||||
((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))
|
||||
#f))
|
||||
(define headers
|
||||
(build-headers
|
||||
always-headers
|
||||
; redirect-query-parameter: only the string "no" is significant:
|
||||
; https://github.com/Wikia/app/blob/fe60579a53f16816d65dad1644363160a63206a6/includes/Wiki.php#L367
|
||||
(when (and redirect-msg-a
|
||||
(not (equal? redirect-query-parameter "no")))
|
||||
(let* ([dest (get-attribute 'href (bits->attributes redirect-msg-a))]
|
||||
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
|
||||
(header #"Refresh" value)))))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers headers
|
||||
(λ (out)
|
||||
(write-html body out))))))]
|
||||
[(eq? 404 (easy:response-status-code dest-res))
|
||||
(next-dispatcher)]
|
||||
[(memq (easy:response-status-code dest-res) '(403 406))
|
||||
(response-handler
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
`(div
|
||||
(p "Sorry! Fandom isn't allowing BreezeWiki to show pages right now.")
|
||||
(p "We'll automatically try again in 30 seconds, so please stay on this page and be patient.")
|
||||
(p (small "In a hurry? " (a (@ (href ,source-url)) "Click here to read the page on Fandom."))))
|
||||
#:req req
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title (url-segments->guess-title segments)
|
||||
#:siteinfo siteinfo))
|
||||
(response/output
|
||||
#:code 503
|
||||
#:headers (build-headers
|
||||
always-headers
|
||||
(header #"Retry-After" #"30")
|
||||
(header #"Cache-Control" #"max-age=30, public")
|
||||
(header #"Refresh" #"35"))
|
||||
(λ (out)
|
||||
(write-html body out))))]
|
||||
[else
|
||||
(response-handler
|
||||
(error 'page-wiki "Tried to load page ~a/~a~nSadly, the page didn't load because Fandom returned status code ~a with response:~n~a"
|
||||
wikiname
|
||||
path
|
||||
(easy:response-status-code dest-res)
|
||||
(easy:response-body dest-res)))]))
|
||||
(cond
|
||||
[(eq? 200 (easy:response-status-code dest-res))
|
||||
(let* ([data (easy:response-json dest-res)]
|
||||
[title (jp "/parse/title" data "")]
|
||||
[page-html (jp "/parse/text" data "")]
|
||||
[page-html (preprocess-html-wiki page-html)]
|
||||
[page (html->xexp page-html)]
|
||||
[head-html (jp "/parse/headhtml" data "")]
|
||||
[body-class (match (regexp-match #rx"<body [^>]*class=\"([^\"]*)" head-html)
|
||||
[(list _ classes) classes]
|
||||
[_ ""])])
|
||||
(if (equal? "missingtitle" (jp "/error/code" data #f))
|
||||
(next-dispatcher)
|
||||
(response-handler
|
||||
(define body
|
||||
(generate-wiki-page
|
||||
(update-tree-wiki page wikiname)
|
||||
#:source-url source-url
|
||||
#:wikiname wikiname
|
||||
#:title title
|
||||
#:body-class body-class
|
||||
#:siteinfo siteinfo))
|
||||
(define redirect-msg ((query-selector (attribute-selector 'class "redirectMsg") body)))
|
||||
(define headers
|
||||
(build-headers
|
||||
always-headers
|
||||
(when redirect-msg
|
||||
(let* ([dest (get-attribute 'href (bits->attributes ((query-selector (λ (t a c) (eq? t 'a)) redirect-msg))))]
|
||||
[value (bytes-append #"0;url=" (string->bytes/utf-8 dest))])
|
||||
(header #"Refresh" value)))))
|
||||
(when (config-true? 'debug)
|
||||
; used for its side effects
|
||||
; convert to string with error checking, error will be raised if xexp is invalid
|
||||
(xexp->html body))
|
||||
(response/output
|
||||
#:code 200
|
||||
#:headers headers
|
||||
(λ (out)
|
||||
(write-html body out))))))])))
|
||||
|
|
|
@ -4,19 +4,13 @@
|
|||
; call the updater on the dictionary key only if it has that key
|
||||
alist-maybe-update
|
||||
; update a value only if a condition succeeds on it
|
||||
u
|
||||
; like string-join, but for lists
|
||||
list-join
|
||||
u-counter)
|
||||
u)
|
||||
|
||||
(module+ test
|
||||
(require "typed-rackunit.rkt"))
|
||||
|
||||
(define u-counter (box 0))
|
||||
|
||||
(: alist-maybe-update (∀ (A B) ((Listof (Pairof A B)) A (B -> B) -> (Listof (Pairof A B)))))
|
||||
(define (alist-maybe-update alist key updater)
|
||||
(set-box! u-counter (add1 (unbox u-counter)))
|
||||
(map (λ ([p : (Pairof A B)])
|
||||
(if (eq? (car p) key)
|
||||
(cons (car p) (updater (cdr p)))
|
||||
|
@ -30,16 +24,7 @@
|
|||
|
||||
(: u (∀ (A) ((A -> Any) (A -> A) A -> A)))
|
||||
(define (u condition updater value)
|
||||
(set-box! u-counter (add1 (unbox u-counter)))
|
||||
(if (condition value) (updater value) value))
|
||||
(module+ test
|
||||
(check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 4) -4)
|
||||
(check-equal? (u (λ ([x : Integer]) (< x 5)) (λ ([x : Integer]) (* x -1)) 8) 8))
|
||||
|
||||
(: list-join (∀ (A B) (A (Listof B) -> (Listof (U A B)))))
|
||||
(define (list-join element ls)
|
||||
(if (pair? (cdr ls))
|
||||
(list* (car ls) element (list-join element (cdr ls)))
|
||||
(list (car ls))))
|
||||
(module+ test
|
||||
(check-equal? (list-join "h" '(2 3 4 5)) '(2 "h" 3 "h" 4 "h" 5)))
|
|
@ -3,7 +3,6 @@
|
|||
;;; Source: https://github.com/tonyg/racket-reloadable/blob/master/reloadable/main.rkt
|
||||
;;; Source commit: cae2a14 from 24 May 2015
|
||||
;;; Source license: LGPL 3 or later
|
||||
;;; Further modifications by Cadence as seen in this repo's git history.
|
||||
|
||||
(provide (struct-out reloadable-entry-point)
|
||||
reload-poll-interval
|
||||
|
@ -20,8 +19,8 @@
|
|||
(require racket/match)
|
||||
(require racket/rerequire)
|
||||
|
||||
(define reload-poll-interval 0.5) ; seconds
|
||||
(define reload-failure-retry-delay (make-parameter 5)) ; seconds
|
||||
(define reload-poll-interval 0.5) ;; seconds
|
||||
(define reload-failure-retry-delay (make-parameter 5)) ;; seconds
|
||||
|
||||
(struct reloadable-entry-point (name
|
||||
module-path
|
||||
|
|
|
@ -1,59 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/string
|
||||
(prefix-in easy: net/http-easy)
|
||||
"application-globals.rkt"
|
||||
"config.rkt"
|
||||
"fandom-request.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
search-fandom)
|
||||
|
||||
(module+ test
|
||||
(require rackunit
|
||||
"test-utils.rkt")
|
||||
(define search-results-data
|
||||
'(#hasheq((ns . 0) (pageid . 219) (size . 1482) (snippet . "") (timestamp . "2022-08-21T08:54:23Z") (title . "Gacha Capsule") (wordcount . 214)) #hasheq((ns . 0) (pageid . 201) (size . 1198) (snippet . "") (timestamp . "2022-07-11T17:52:47Z") (title . "Badges") (wordcount . 181)))))
|
||||
|
||||
(define (search-fandom wikiname query params)
|
||||
(define res
|
||||
(fandom-get-api
|
||||
wikiname
|
||||
`(("action" . "query")
|
||||
("list" . "search")
|
||||
("srsearch" . ,query)
|
||||
("formatversion" . "2")
|
||||
("format" . "json"))))
|
||||
(define json (easy:response-json res))
|
||||
(define search-results (jp "/query/search" json))
|
||||
(generate-results-content-fandom wikiname query search-results))
|
||||
|
||||
;;; generate content for display in the wiki page layout
|
||||
(define (generate-results-content-fandom wikiname query search-results)
|
||||
`(div (@ (class "mw-parser-output"))
|
||||
;; header before the search results showing how many we found
|
||||
(p ,(format "~a results found for " (length search-results))
|
||||
(strong ,query))
|
||||
;; *u*nordered *l*ist of matching search results
|
||||
(ul ,@(for/list ([result search-results])
|
||||
(let* ([title (jp "/title" result)]
|
||||
[page-path (page-title->path title)]
|
||||
[timestamp (jp "/timestamp" result)]
|
||||
[wordcount (jp "/wordcount" result)]
|
||||
[size (jp "/size" result)])
|
||||
;; and make this x-expression...
|
||||
`(li (@ (class "my-result"))
|
||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname page-path))) ; using unquote to insert the result page URL
|
||||
,title) ; using unquote to insert the result page title
|
||||
(div (@ (class "my-result__info")) ; constructing the line under the search result
|
||||
"last edited "
|
||||
(time (@ (datetime ,timestamp)) ,(list-ref (string-split timestamp "T") 0))
|
||||
,(format ", ~a words, ~a kb"
|
||||
wordcount
|
||||
(exact->inexact (/ (round (/ size 100)) 10))))))))))
|
||||
|
||||
(module+ test
|
||||
(parameterize ([(config-parameter 'feature_offline::only) "false"])
|
||||
(check-not-false ((query-selector (attribute-selector 'href "/test/wiki/Gacha_Capsule")
|
||||
(generate-results-content-fandom "test" "Gacha" search-results-data))))))
|
|
@ -1,89 +0,0 @@
|
|||
#lang racket/base
|
||||
(require racket/dict
|
||||
racket/string
|
||||
(prefix-in easy: net/http-easy)
|
||||
"application-globals.rkt"
|
||||
"../lib/html-parsing/main.rkt"
|
||||
"../lib/url-utils.rkt"
|
||||
"../lib/xexpr-utils.rkt")
|
||||
|
||||
(provide
|
||||
search-solr)
|
||||
|
||||
(struct result^ (hl-title hl-body kb words page-path) #:transparent)
|
||||
|
||||
(define (search-solr wikiname query params)
|
||||
;; grab things from params that would modify the search
|
||||
(define op (if (equal? (dict-ref params 'op #f) "or") '("or" . "OR") '("and" . "AND")))
|
||||
(define sort (if (equal? (dict-ref params 'sort #f) "len") '("len" . "len desc") '("relevance" . "score desc")))
|
||||
|
||||
;; the dest-URL will look something like http://localhost:8983/solr/bloons/select?defType=edismax&fl=id%2Clen&hl.defaultSummary=true&hl.encoder=html&hl.fl=title%2Cbody&hl.method=unified&hl.tag.post=%3C%2Fmark%3E&hl.tag.pre=%3Cmark%3E&hl=true&indent=true&q.op=AND&q=blo&qf=title_prefix%20title%5E2.0%20body%20table%5E0.3&useParams=
|
||||
(define dest-url
|
||||
(format "http://localhost:8983/solr/~a/select?~a"
|
||||
wikiname
|
||||
(params->query `(("defType" . "edismax")
|
||||
("q" . ,query)
|
||||
("q.op" . ,(cdr op))
|
||||
("qf" . "title_prefix title^2.0 body table^0.3")
|
||||
("hl" . "true")
|
||||
("hl.method" . "unified")
|
||||
("hl.defaultSummary" . "true")
|
||||
("hl.fl" . "title,body")
|
||||
("fl" . "id,len,title")
|
||||
("hl.encoder" . "html")
|
||||
("hl.tag.pre" . "<mark>")
|
||||
("hl.tag.post" . "</mark>")
|
||||
("sort" . ,(cdr sort))))))
|
||||
;; HTTP request to dest-url for search results
|
||||
(define res (easy:get dest-url #:timeouts (easy:make-timeout-config #:lease 5 #:connect 5)))
|
||||
(define json (easy:response-json res))
|
||||
|
||||
;; build result objects
|
||||
(define highlighting (jp "/highlighting" json))
|
||||
(define results
|
||||
(for/list ([doc (jp "/response/docs" json)])
|
||||
(define id (jp "/id" doc))
|
||||
(define len (jp "/len" doc))
|
||||
(define title (let ([t (jp "/title" doc)])
|
||||
(if (list? t) (car t) t)))
|
||||
(define page-path (page-title->path title))
|
||||
(define kb (exact->inexact (/ (round (/ len 100)) 10))) ; divide by 1000 and round to nearest 0.1
|
||||
(define words (* (round (/ len 60)) 10)) ; divide by 6 and round to nearest 10
|
||||
(define hl (hash-ref highlighting (string->symbol id)))
|
||||
(define hl-title (cdr (html->xexp (jp "/title/0" hl))))
|
||||
(define hl-body (cdr (html->xexp (string-trim (jp "/body/0" hl)))))
|
||||
(result^ hl-title hl-body kb words page-path)))
|
||||
|
||||
(define qtime (exact->inexact (/ (round (/ (jp "/responseHeader/QTime" json) 10)) 100)))
|
||||
|
||||
(define (value-selected? value current-value)
|
||||
(append
|
||||
`((value ,value))
|
||||
(if (equal? value current-value)
|
||||
`((selected))
|
||||
`())))
|
||||
|
||||
;; generate content for display in the wiki page layout
|
||||
`(div (@ (class "mw-parser-output"))
|
||||
(form (@ (class "my-result__filter"))
|
||||
(input (@ (type "hidden") (name "q") (value ,query)))
|
||||
(select (@ (name "op"))
|
||||
(option (@ ,@(value-selected? "and" (car op))) "All words must match")
|
||||
(option (@ ,@(value-selected? "or" (car op))) "Some words must match"))
|
||||
(select (@ (name "sort"))
|
||||
(option (@ ,@(value-selected? "relevance" (car sort))) "Relevant articles")
|
||||
(option (@ ,@(value-selected? "len" (car sort))) "Wordiest articles"))
|
||||
(button "Filter results"))
|
||||
;; header before the search results showing how many we found
|
||||
(p ,(format "~a results (~a seconds) found for " (jp "/response/numFound" json) qtime)
|
||||
(strong ,query))
|
||||
;; *u*nordered *l*ist of matching search results
|
||||
(ul ,@(for/list ([result results])
|
||||
`(li (@ (class "my-result"))
|
||||
(a (@ (class "my-result__link") (href ,(format "/~a/wiki/~a" wikiname (result^-page-path result)))) ; url
|
||||
,@(result^-hl-title result)) ; title
|
||||
(p (@ (class "my-result__description")) ,@(result^-hl-body result)) ; result preview
|
||||
(div (@ (class "my-result__info")) ; line under the search result
|
||||
,(format "~a words, ~a kb of readable stuff"
|
||||
(result^-words result)
|
||||
(result^-kb result))))))))
|
|
@ -1,37 +0,0 @@
|
|||
#lang typed/racket/base
|
||||
(require racket/path
|
||||
racket/runtime-path
|
||||
racket/string)
|
||||
|
||||
(provide
|
||||
get-static-url
|
||||
link-header)
|
||||
|
||||
(define-runtime-path path-static "../static")
|
||||
|
||||
(define static-data
|
||||
(for/hash : (Immutable-HashTable Path Nonnegative-Integer)([f (directory-list path-static)])
|
||||
(define built (simple-form-path (build-path path-static f)))
|
||||
(values built (file-or-directory-modify-seconds built))))
|
||||
|
||||
(: get-static-url (Path-String -> String))
|
||||
(define (get-static-url path-or-filename)
|
||||
(define the-path (simple-form-path (if (path? path-or-filename)
|
||||
path-or-filename
|
||||
(build-path path-static path-or-filename))))
|
||||
(format "/static/~a?t=~a" (file-name-from-path the-path) (hash-ref static-data the-path)))
|
||||
|
||||
; https://developer.mozilla.org/en-US/docs/Web/HTML/Link_types/preload
|
||||
(: link-header String)
|
||||
(define link-header
|
||||
(let* ([with-t '(("main.css" "as=style"))]
|
||||
[without-t '(("preact.js" "as=script")
|
||||
("source-sans-pro-v21-vietnamese_latin-ext_latin_greek-ext_greek_cyrillic-ext_cyrillic-regular.woff2" "as=font" "crossorigin" "type=font/woff2"))]
|
||||
[with-t-full (map (λ ([path : (Listof String)]) (cons (get-static-url (car path)) (cdr path))) with-t)]
|
||||
[without-t-full (map (λ ([path : (Listof String)]) (cons (format "/static/~a" (car path)) (cdr path))) without-t)]
|
||||
[all (append with-t-full without-t-full)]
|
||||
[header-parts
|
||||
(for/list : (Listof String) ([full-path all])
|
||||
(define attributes (map (λ ([s : String]) (format "; ~a" s)) (cdr full-path)))
|
||||
(format "<~a>; rel=preload~a" (car full-path) (string-join attributes "")))])
|
||||
(string-join header-parts ", ")))
|
108
src/syntax.rkt
Normal file
108
src/syntax.rkt
Normal file
|
@ -0,0 +1,108 @@
|
|||
#lang racket/base
|
||||
(require (for-syntax racket/base))
|
||||
|
||||
(provide
|
||||
; help make a nested if. if/in will gain the same false form of its containing if/out.
|
||||
if/out
|
||||
; let, but the value for each variable is evaluated within a thread
|
||||
thread-let)
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(define (check-syntax-equal? s1 s2)
|
||||
(check-equal? (syntax->datum s1)
|
||||
(syntax->datum s2))))
|
||||
|
||||
;; actual transforming goes on in here.
|
||||
;; it's in a submodule so that it can be required in both levels, for testing
|
||||
|
||||
(module transform racket/base
|
||||
(provide
|
||||
transform-if/out
|
||||
transform-thread-let)
|
||||
|
||||
(define (transform-if/out stx)
|
||||
(define tree (cdr (syntax->datum stx))) ; condition true false
|
||||
(define else (cddr tree)) ; the else branch cons cell
|
||||
(define result
|
||||
(let walk ([node tree])
|
||||
(cond
|
||||
; normally, node should be a full cons cell (a pair) but it might be something else.
|
||||
; situation: reached the end of a list, empty cons cell
|
||||
[(null? node) node]
|
||||
; situation: reached the end of a list, cons cdr was non-list
|
||||
[(symbol? node) node]
|
||||
; normal situation, full cons cell
|
||||
; -- don't go replacing through nested if/out
|
||||
[(and (pair? node) (eq? 'if/out (car node))) node]
|
||||
; -- replace if/in
|
||||
[(and (pair? node) (eq? 'if/in (car node)))
|
||||
(append '(if) (cdr node) else)]
|
||||
; recurse down pair head and tail
|
||||
[(pair? node) (cons (walk (car node)) (walk (cdr node)))]
|
||||
; something else that can't be recursed into, so pass it through
|
||||
[#t node])))
|
||||
(datum->syntax stx (cons 'if result)))
|
||||
|
||||
(define (transform-thread-let stx)
|
||||
(define tree (cdr (syntax->datum stx)))
|
||||
(define defs (car tree))
|
||||
(define forms (cdr tree))
|
||||
(when (eq? (length forms) 0)
|
||||
(error (format "thread-let: bad syntax (need some forms to execute after the threads)~n forms: ~a" forms)))
|
||||
(define counter (build-list (length defs) values))
|
||||
(datum->syntax
|
||||
stx
|
||||
`(let ([chv (build-vector ,(length defs) (λ (_) (make-channel)))])
|
||||
,@(map (λ (n)
|
||||
(define def (list-ref defs n))
|
||||
`(thread (λ () (channel-put (vector-ref chv ,n) (let _ () ,@(cdr def))))))
|
||||
counter)
|
||||
(let ,(map (λ (n)
|
||||
(define def (list-ref defs n))
|
||||
`(,(car def) (channel-get (vector-ref chv ,n))))
|
||||
counter)
|
||||
,@forms)))))
|
||||
|
||||
;; the syntax definitions and their tests go below here
|
||||
|
||||
(require 'transform (for-syntax 'transform))
|
||||
|
||||
(define-syntax (if/out stx)
|
||||
(transform-if/out stx))
|
||||
(module+ test
|
||||
(check-syntax-equal? (transform-if/out #'(if/out (condition 1) (if/in (condition 2) (do-yes)) (do-no)))
|
||||
#'(if (condition 1) (if (condition 2) (do-yes) (do-no)) (do-no)))
|
||||
(check-equal? (if/out #t (if/in #t 'yes) 'no) 'yes)
|
||||
(check-equal? (if/out #f (if/in #t 'yes) 'no) 'no)
|
||||
(check-equal? (if/out #t (if/in #f 'yes) 'no) 'no)
|
||||
(check-equal? (if/out #f (if/in #f 'yes) 'no) 'no))
|
||||
|
||||
(define-syntax (thread-let stx)
|
||||
(transform-thread-let stx))
|
||||
(module+ test
|
||||
; check that it is transformed as expected
|
||||
(check-syntax-equal?
|
||||
(transform-thread-let
|
||||
#'(thread-let ([a (hey "this is a")]
|
||||
[b (hey "this is b")])
|
||||
(list a b)))
|
||||
#'(let ([chv (build-vector 2 (λ (_) (make-channel)))])
|
||||
(thread (λ () (channel-put (vector-ref chv 0) (let _ () (hey "this is a")))))
|
||||
(thread (λ () (channel-put (vector-ref chv 1) (let _ () (hey "this is b")))))
|
||||
(let ([a (channel-get (vector-ref chv 0))]
|
||||
[b (channel-get (vector-ref chv 1))])
|
||||
(list a b))))
|
||||
; check that they actually execute concurrently
|
||||
(define ch (make-channel))
|
||||
(check-equal? (thread-let ([a (begin
|
||||
(channel-put ch 'a)
|
||||
(channel-get ch))]
|
||||
[b (begin0
|
||||
(channel-get ch)
|
||||
(channel-put ch 'b))])
|
||||
(list a b))
|
||||
'(b a))
|
||||
; check that it assigns the correct value to the correct variable
|
||||
(check-equal? (thread-let ([a (sleep 0) 'a] [b 'b]) (list a b))
|
||||
'(a b)))
|
|
@ -1,8 +0,0 @@
|
|||
#lang racket/base
|
||||
(require web-server/http/request-structs
|
||||
net/url-structs
|
||||
(only-in racket/promise delay))
|
||||
(provide
|
||||
test-req)
|
||||
|
||||
(define test-req (request #"GET" (url "https" #f "breezewiki.com" #f #t (list (path/param "" '())) '() #f) '() (delay '()) #f "127.0.0.1" 0 "127.0.0.1"))
|
|
@ -1,6 +1,6 @@
|
|||
#lang typed/racket/base
|
||||
(require racket/string
|
||||
typed/net/url-structs
|
||||
"config.rkt"
|
||||
"pure-utils.rkt")
|
||||
(require/typed web-server/http/request-structs
|
||||
[#:opaque Header header?])
|
||||
|
@ -10,43 +10,30 @@
|
|||
px-wikiname
|
||||
; make a query string from an association list of strings
|
||||
params->query
|
||||
; custom percent encoding (you probably want params->query instead)
|
||||
percent-encode
|
||||
; sets for custom percent encoding
|
||||
path-set urlencoded-set filename-set
|
||||
; make a proxied version of a fandom url
|
||||
u-proxy-url
|
||||
; check whether a url is on a domain controlled by fandom
|
||||
is-fandom-url?
|
||||
; prints "out: <url>"
|
||||
log-outgoing
|
||||
; pass in a header, headers, or something useless. they'll all combine into a list
|
||||
build-headers
|
||||
; try to follow wikimedia's format for which characters should be encoded/replaced in page titles for the url
|
||||
page-title->path
|
||||
; path/param eats semicolons into params, which need to be fixed back into semicolons
|
||||
fix-semicolons-url-path
|
||||
fix-semicolons-url)
|
||||
build-headers)
|
||||
|
||||
(module+ test
|
||||
(require "typed-rackunit.rkt"))
|
||||
|
||||
(define px-wikiname "[a-zA-Z0-9-]{1,50}")
|
||||
(define px-wikiname "[a-zA-Z0-9-]{3,50}")
|
||||
|
||||
;; https://url.spec.whatwg.org/#urlencoded-serializing
|
||||
|
||||
(define path-set '(#\; ; semicolon is part of the userinfo set in the URL standard, but I'm putting it here
|
||||
#\? #\` #\{ #\} ; path set
|
||||
#\ #\" #\# #\< #\> ; query set
|
||||
; c0 controls included elsewhere
|
||||
; higher ranges included elsewhere
|
||||
))
|
||||
(define urlencoded-set (append
|
||||
'(#\! #\' #\( #\) #\~ ; urlencoded set
|
||||
#\$ #\% #\& #\+ #\, ; component set
|
||||
#\/ #\: #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
|
||||
)
|
||||
path-set))
|
||||
|
||||
(define filename-set '(#\< #\> #\: #\" #\/ #\\ #\| #\? #\* #\# #\~ #\&))
|
||||
(define urlencoded-set '(#\! #\' #\( #\) #\~ ; urlencoded set
|
||||
#\$ #\% #\& #\+ #\, ; component set
|
||||
#\/ #\: #\; #\= #\@ #\[ #\\ #\] #\^ #\| ; userinfo set
|
||||
#\? #\` #\{ #\} ; path set
|
||||
#\ #\" #\# #\< #\> ; query set
|
||||
; c0 controls included elsewhere
|
||||
; higher ranges included elsewhere
|
||||
))
|
||||
|
||||
(: percent-encode (String (Listof Char) Boolean -> Bytes))
|
||||
(define (percent-encode value set space-as-plus)
|
||||
|
@ -94,6 +81,11 @@
|
|||
(λ ([v : String]) (string-append "/proxy?" (params->query `(("dest" . ,url)))))
|
||||
url))
|
||||
|
||||
(: log-outgoing (String -> Void))
|
||||
(define (log-outgoing url-string)
|
||||
(when (config-true? 'log_outgoing)
|
||||
(printf "out: ~a~n" url-string)))
|
||||
|
||||
(: build-headers ((U Header (Listof Header) False Void) * -> (Listof Header)))
|
||||
(define (build-headers . fs)
|
||||
(apply
|
||||
|
@ -106,24 +98,3 @@
|
|||
[(header? f) (list f)]
|
||||
[(pair? f) f]))
|
||||
fs)))
|
||||
|
||||
(: page-title->path (String -> Bytes))
|
||||
(define (page-title->path title)
|
||||
(percent-encode (regexp-replace* " " title "_") path-set #f))
|
||||
|
||||
(: fix-semicolons-url-path ((Listof Path/Param) -> (Listof Path/Param)))
|
||||
(define (fix-semicolons-url-path pps)
|
||||
(for/list ([pp pps])
|
||||
(define path (path/param-path pp))
|
||||
(if (or (null? (path/param-param pp))
|
||||
(symbol? path))
|
||||
pp
|
||||
;; path/param does have params, which need to be fixed into a semicolon.
|
||||
(path/param
|
||||
(string-append path ";" (string-join (path/param-param pp) ";"))
|
||||
null))))
|
||||
|
||||
(: fix-semicolons-url (URL -> URL))
|
||||
(define (fix-semicolons-url orig-url)
|
||||
(struct-copy url orig-url [path (fix-semicolons-url-path (url-path orig-url))]))
|
||||
|
|
@ -86,16 +86,15 @@
|
|||
|
||||
; "element" is a real element with a type and everything (non-string, non-attributes)
|
||||
(define (element-is-element? element)
|
||||
(and (element-is-bits? element) (not (eq? (car element) '&))(not (element-is-xattributes? element))))
|
||||
(and (element-is-bits? element) (not (element-is-xattributes? element))))
|
||||
(module+ test
|
||||
(check-true (element-is-element? '(span "hi")))
|
||||
(check-false (element-is-element? '(@ (alt "Cute cat."))))
|
||||
(check-false (element-is-element? "hi"))
|
||||
(check-false (element-is-element? '(& ndash))))
|
||||
(check-false (element-is-element? "hi")))
|
||||
|
||||
; "element content" is a real element or a string or a (& x) sequence
|
||||
; "element content" is a real element or a string
|
||||
(define (element-is-content? element)
|
||||
(or (string? element) (element-is-element? element) (and (pair? element) (eq? (car element) '&))))
|
||||
(or (string? element) (element-is-element? element)))
|
||||
(module+ test
|
||||
(check-true (element-is-content? '(span "hi")))
|
||||
(check-false (element-is-content? '(@ (alt "Cute cat."))))
|
||||
|
@ -130,7 +129,7 @@
|
|||
(λ (element-type attributes children)
|
||||
(equal? (get-attribute name attributes) value)))
|
||||
|
||||
(define (query-selector selector element #:include-text? [include-text? #f])
|
||||
(define (query-selector selector element)
|
||||
(generator
|
||||
()
|
||||
(let loop ([element element])
|
||||
|
@ -141,9 +140,7 @@
|
|||
[(equal? element-type '*DECL*) #f]
|
||||
[(equal? element-type '@) #f]
|
||||
[#t
|
||||
(when (if include-text?
|
||||
(selector element-type attributes children (filter string? (cdr element)))
|
||||
(selector element-type attributes children))
|
||||
(when (selector element-type attributes children)
|
||||
(yield element))
|
||||
(for ([child children]) (loop child))]))
|
||||
#f))
|
||||
|
@ -160,40 +157,26 @@
|
|||
(define element-type (car element))
|
||||
(define attributes (bits->attributes (cdr element)))
|
||||
(define contents (filter element-is-content? (cdr element))) ; provide elements and strings
|
||||
(cond
|
||||
[(equal? element-type '*DECL*)
|
||||
; declarations like <!DOCTYPE html> get mapped as attributes as if the element were (*DECL* (@ (DOCTYPE) (html)))
|
||||
(match (transformer element element-type (map list (cdr element)) null)
|
||||
[(list element-type attributes contents)
|
||||
`(*DECL* ,@(map car attributes))]
|
||||
[#f ""])]
|
||||
[(member element-type '(@ &))
|
||||
; special element, do nothing
|
||||
element]
|
||||
[#t
|
||||
; regular element, transform it
|
||||
(match (transformer element element-type attributes contents)
|
||||
[(list element-type attributes contents)
|
||||
(append (list element-type)
|
||||
(if (pair? attributes) (list (append '(@) attributes)) (list))
|
||||
(map (λ (content)
|
||||
(if (element-is-element? content) (loop content) content))
|
||||
contents))])])))
|
||||
(if (or (equal? element-type '*DECL)
|
||||
(equal? element-type '@)
|
||||
(equal? element-type '&))
|
||||
; special element, do nothing
|
||||
element
|
||||
; regular element, transform it
|
||||
(match (transformer element element-type attributes contents)
|
||||
[(list element-type attributes contents)
|
||||
(append (list element-type)
|
||||
(if (pair? attributes) (list (append '(@) attributes)) (list))
|
||||
(map (λ (content)
|
||||
(if (element-is-element? content) (loop content) content))
|
||||
contents))]))))
|
||||
(module+ test
|
||||
; check doctype is preserved when present
|
||||
(check-equal? (update-tree (λ (e t a c) (list t a c)) '(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
|
||||
'(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
|
||||
; check doctype can be removed if desirable
|
||||
(check-equal? (update-tree (λ (e t a c) (if (eq? t '*DECL*) #f (list t a c))) '(*TOP* (*DECL* DOCTYPE html) (html (body "Hey"))))
|
||||
'(*TOP* "" (html (body "Hey"))))
|
||||
; check (& x) sequences are preserved
|
||||
(check-equal? (update-tree (λ (e t a c) (list t a c)) '(body "Hey" (& nbsp) (a (@ (href "/")))))
|
||||
'(body "Hey" (& nbsp) (a (@ (href "/"))))))
|
||||
|
||||
(define (has-class? name attributes)
|
||||
;; splitting without specifying separator or splitting on #px"\\s+" makes
|
||||
;; string-split use a faster whitespace-specialized implementation.
|
||||
(and (member name (string-split (or (get-attribute 'class attributes) "") #px"\\s+")) #t))
|
||||
(and (member name (string-split (or (get-attribute 'class attributes) "") " ")) #t))
|
||||
(module+ test
|
||||
(check-true (has-class? "red" '((class "yellow red blue"))))
|
||||
(check-false (has-class? "red" '((class "yellow blue"))))
|
|
@ -1,136 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg:svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
height="36.41787"
|
||||
width="224.9014"
|
||||
version="1.1"
|
||||
id="svg912"
|
||||
sodipodi:docname="breezewiki-color.svg"
|
||||
inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)">
|
||||
<svg:metadata
|
||||
id="metadata918">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</svg:metadata>
|
||||
<svg:defs
|
||||
id="defs916" />
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1882"
|
||||
inkscape:window-height="1059"
|
||||
id="namedview914"
|
||||
showgrid="false"
|
||||
showguides="false"
|
||||
inkscape:zoom="2.8284271"
|
||||
inkscape:cx="123.17581"
|
||||
inkscape:cy="7.7496502"
|
||||
inkscape:window-x="38"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg912"
|
||||
showborder="true"
|
||||
inkscape:pagecheckerboard="true"
|
||||
inkscape:snap-bbox="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-smooth-nodes="true" />
|
||||
<div
|
||||
id="saka-gui-root">
|
||||
<div>
|
||||
<div>
|
||||
<style />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<svg:g
|
||||
aria-label="wiki"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;line-height:2.67644334px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="text926"
|
||||
transform="translate(97.9947,1.1875002)">
|
||||
<svg:path
|
||||
d="m 81.588122,13.020001 q 0,0.08533 0,0.298666 -0.04267,0.256001 -0.04267,0.384001 0,0.128 -0.341333,0.213333 -0.810667,0.170667 -0.981334,0.256 -0.341333,0.170667 -0.725333,0.725333 -0.512,0.725334 -1.237333,2.517334 l -4.480001,11.008 q -0.128,0.298667 -0.725333,0.298667 -0.554667,0 -0.682667,-0.298667 l -3.584,-8.874667 -3.626667,8.874667 q -0.128,0.298667 -0.725333,0.298667 -0.554667,0 -0.682667,-0.298667 L 58.846788,16.561334 q -0.554667,-1.322666 -1.066667,-1.92 -0.469333,-0.64 -1.706667,-0.896 -0.341333,-0.08533 -0.341333,-0.256 v -0.725333 q 0,-0.298667 0.298667,-0.298667 0.896,0 2.474666,0.08533 1.749334,0.08533 2.432,0.08533 0,0 4.821334,0 1.152,0 3.456,-0.08533 2.304,-0.08533 3.456,-0.08533 0.341333,0 0.341333,0.469333 0,0.08533 0,0.298667 -0.04267,0.170667 -0.04267,0.256 0,0.256 -0.981334,0.384 -0.981333,0.128 -0.981333,0.682667 0,0.256 0.128,0.597333 l 3.157333,8.064 2.986667,-7.552 q 0.213333,-0.554666 0.213333,-0.938666 0,-0.554667 -0.810666,-0.682667 -0.810667,-0.128 -0.810667,-0.384 v -0.725334 q 0,-0.298666 0.469333,-0.298666 0.384,0 1.194667,0.128 0.810667,0.08533 1.194667,0.08533 0.426667,0 1.194667,-0.08533 0.768,-0.128 1.024,-0.128 0.64,0 0.64,0.384 z m -13.525334,4.693333 -0.853333,-2.133333 q -0.554667,-1.408 -2.517334,-1.408 h -1.536 q -0.810667,0 -0.810667,0.64 0,0.256 0.128,0.597333 l 3.242667,8.149334 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path842"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 91.700066,5.6813339 q 0,1.1093334 -0.768,1.8773334 -0.768,0.768 -1.877333,0.768 -1.109333,0 -1.877333,-0.768 -0.768,-0.768 -0.768,-1.8773334 0,-1.1093333 0.768,-1.8773334 0.768,-0.768 1.877333,-0.768 1.109333,0 1.877333,0.768 0.768,0.7680001 0.768,1.8773334 z m 1.706667,22.3573341 q 0,0.298667 -0.298667,0.298667 -0.298666,0 -0.298666,0 -1.962667,-0.170667 -3.669334,-0.170667 -1.621333,0 -3.584,0.170667 0,0 -0.170666,0 -0.469334,0 -0.469334,-0.426667 0,-0.08533 0.04267,-0.298667 0,-0.213333 0,-0.298666 0,-0.256 0.853333,-0.384 0.853334,-0.128 1.066667,-0.554667 0.256,-0.469333 0.256,-1.834667 v -6.528 q 0,-1.322667 -0.256,-1.749333 -0.170667,-0.298667 -1.109333,-0.682667 -0.896,-0.384 -0.896,-0.768 0,-0.256 0.128,-0.469333 0.04267,-0.08533 2.858666,-1.194667 2.816,-1.152 3.029334,-1.152 0.341333,0 0.341333,0.341333 v 11.904 q 0,1.749334 0.170667,2.090667 0.256,0.469334 1.109333,0.597334 0.896,0.128 0.896,0.384 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path844"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 114.52672,27.526668 q 0,0.810667 -0.29867,0.810667 -0.59733,0 -1.87733,-0.08533 -1.23733,-0.08533 -1.87733,-0.08533 -0.59734,0 -1.83467,0.08533 -1.19467,0.08533 -1.792,0.08533 -0.384,0 -0.384,-0.469334 0,-0.08533 0.0427,-0.298666 0,-0.170667 0,-0.256 0,-0.256 0.55466,-0.341334 0.59734,-0.128 0.59734,-0.512 0,-0.298666 -0.512,-1.152 l -3.24267,-5.418667 -1.87733,1.408 v 3.2 q 0,1.408001 0.21333,1.877334 0.21333,0.426667 1.152,0.554667 0.81067,0.128 0.81067,0.384 v 0.725333 q 0,0.298667 -0.59734,0.298667 -0.59733,0 -1.83466,-0.08533 -1.19467,-0.08533 -1.792004,-0.08533 -0.597333,0 -1.834667,0.08533 -1.194666,0.08533 -1.792,0.08533 -0.597333,0 -0.597333,-0.426667 v -0.597333 q 0,-0.256 0.853333,-0.384 0.853334,-0.128 1.066667,-0.554667 0.256,-0.469333 0.256,-1.834667 V 6.4920006 q 0,-1.3226667 -0.256,-1.7493334 -0.170667,-0.2986666 -1.109333,-0.6826666 -0.896001,-0.3840001 -0.896001,-0.7680001 0,-0.256 0.128,-0.4693333 0.04267,-0.085333 2.858667,-1.1946667 2.816001,-1.15200005 3.029331,-1.15200005 0.34134,0 0.34134,0.34133334 V 19.718668 q 5.71733,-4.309334 5.71733,-5.290667 0,-0.682667 -1.792,-0.682667 -0.46933,0 -0.46933,-0.512 0,-0.768 0.42666,-0.768 0.55467,0 1.70667,0.08533 1.19467,0.08533 1.74933,0.08533 0.55467,0 1.70667,-0.08533 1.152,-0.08533 1.70667,-0.08533 0.42666,0 0.42666,0.469333 0,0.08533 0,0.298667 -0.0427,0.170667 -0.0427,0.298667 0,0.128 -0.384,0.213333 -0.64,0.08533 -1.83467,0.426667 -0.512,0.213333 -2.048,1.578667 -1.024,0.896 -2.09067,1.834666 l 4.224,6.656 q 1.57867,2.474667 2.60267,2.688001 0.29867,0.04267 0.59733,0.128 0.29867,0.08533 0.29867,0.469333 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path846"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 123.69999,5.6813339 q 0,1.1093334 -0.768,1.8773334 -0.768,0.768 -1.87734,0.768 -1.10933,0 -1.87733,-0.768 -0.768,-0.768 -0.768,-1.8773334 0,-1.1093333 0.768,-1.8773334 0.768,-0.768 1.87733,-0.768 1.10934,0 1.87734,0.768 0.768,0.7680001 0.768,1.8773334 z m 1.70666,22.3573341 q 0,0.298667 -0.29866,0.298667 -0.29867,0 -0.29867,0 -1.96267,-0.170667 -3.66933,-0.170667 -1.62134,0 -3.584,0.170667 0,0 -0.17067,0 -0.46933,0 -0.46933,-0.426667 0,-0.08533 0.0427,-0.298667 0,-0.213333 0,-0.298666 0,-0.256 0.85334,-0.384 0.85333,-0.128 1.06666,-0.554667 0.256,-0.469333 0.256,-1.834667 v -6.528 q 0,-1.322667 -0.256,-1.749333 -0.17066,-0.298667 -1.10933,-0.682667 -0.896,-0.384 -0.896,-0.768 0,-0.256 0.128,-0.469333 0.0427,-0.08533 2.85867,-1.194667 2.816,-1.152 3.02933,-1.152 0.34133,0 0.34133,0.341333 v 11.904 q 0,1.749334 0.17067,2.090667 0.256,0.469334 1.10933,0.597334 0.896,0.128 0.896,0.384 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path848"
|
||||
inkscape:connector-curvature="0" />
|
||||
</svg:g>
|
||||
<svg:g
|
||||
aria-label="breeze"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;line-height:2.57638931px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;opacity:1;fill:#252525;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="text930"
|
||||
transform="translate(-69.005304,1.5000002)">
|
||||
<svg:path
|
||||
d="m 162.59026,13.697235 q 0,0.782126 -0.48131,1.363707 -0.46126,0.581581 -1.22333,0.581581 -0.46125,0 -0.92251,-0.421145 -0.44119,-0.421144 -0.80218,-0.421144 -0.16043,0 -0.9225,0.80218 -1.28349,1.323598 -2.68731,3.690031 -1.34365,2.266161 -2.00545,4.111176 -0.48131,1.504089 -1.52414,4.452103 -0.28077,0.701908 -0.80219,0.701908 -0.50136,0 -0.98267,-0.240654 -0.58158,-0.280763 -0.58158,-0.721962 0,-0.180491 0.10028,-0.461254 l 2.52686,-7.259736 q 0.8824,-2.526869 0.8824,-4.111176 0,-1.383761 -0.80218,-1.383761 -1.103,0 -2.08567,1.263434 -0.96261,1.263435 -0.98267,1.263435 -0.14038,0 -0.46125,-0.2206 -0.32087,-0.240654 -0.32087,-0.381036 0,-0.140381 0.0802,-0.280763 0.80218,-1.383762 1.88513,-2.607087 1.50409,-1.664525 2.52687,-1.664525 2.12578,0 2.12578,3.10845 0,1.443925 -0.42115,3.288941 3.9708,-6.4375 6.05647,-6.4375 0.76207,0 1.28348,0.601635 0.54148,0.601636 0.54148,1.383762 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path853"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 148.05073,15.502142 q 0,4.672702 -5.71554,9.485786 -4.25155,3.569704 -7.74104,3.569704 -1.38376,0 -2.24611,-0.661799 -0.96261,-0.721962 -0.96261,-2.065615 0,-0.561526 0.20054,-1.24338 l 5.85592,-20.1748444 q 0.12033,-0.4211449 0.12033,-0.7420171 0,-0.8823988 -1.12305,-1.1832166 -1.12306,-0.3008177 -1.12306,-0.5013629 0,-0.7821262 0.62169,-1.04283489 0.30082,-0.0200545 0.78213,-0.0601636 0.56153,-0.0601635 2.28621,-0.4612539 Q 140.73083,-2.0861626e-7 140.791,-2.0861626e-7 q 0.42114,0 0.42114,0.38103582861626 0,0.18049066 -1.20327,4.33177578 L 136.09824,18.06912 q 5.03368,-6.357282 9.06464,-6.357282 1.34365,0 2.16589,1.263434 0.72196,1.102999 0.72196,2.52687 z m -2.92796,0.200545 q 0,-1.764798 -1.48403,-1.764798 -2.54693,0 -6.05647,4.331776 -3.42932,4.211449 -3.42932,6.83859 0,1.965343 1.36371,1.965343 2.78758,0 6.25701,-4.311721 3.3491,-4.17134 3.3491,-7.05919 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path851"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 175.36498,14.760125 q 0,3.028232 -3.66997,4.572429 -2.18595,0.902454 -6.29712,1.403817 -0.16044,1.123053 -0.16044,2.145833 0,3.54965 3.46943,3.54965 1.12306,0 2.4266,-1.183217 1.30354,-1.183216 1.22332,-1.183216 0.14039,0 0.46126,0.320872 0.32087,0.300818 0.32087,0.441199 0,0.140382 -0.12033,0.300818 -2.54692,3.429322 -5.89602,3.429322 -2.18595,0 -3.5296,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82769,-8.12208 2.84774,-3.50954 6.61799,-3.50954 1.50409,0 2.50681,0.762072 1.103,0.842289 1.103,2.286215 z m -2.54692,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84502,0 -3.54965,2.205997 -1.3236,1.704634 -2.04556,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path855"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 188.96195,14.760125 q 0,3.028232 -3.66998,4.572429 -2.18594,0.902454 -6.29712,1.403817 -0.16043,1.123053 -0.16043,2.145833 0,3.54965 3.46943,3.54965 1.12305,0 2.4266,-1.183217 1.30354,-1.183216 1.22332,-1.183216 0.14038,0 0.46125,0.320872 0.32088,0.300818 0.32088,0.441199 0,0.140382 -0.12033,0.300818 -2.54692,3.429322 -5.89603,3.429322 -2.18594,0 -3.52959,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82768,-8.12208 2.84775,-3.50954 6.618,-3.50954 1.50408,0 2.50681,0.762072 1.103,0.842289 1.103,2.286215 z m -2.54693,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84501,0 -3.54964,2.205997 -1.3236,1.704634 -2.04557,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path857"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 204.80502,12.514019 q 0,0.721962 -0.92251,1.704634 -0.36098,0.40109 -6.39739,6.477609 -1.34366,1.283489 -3.7502,3.850467 l 0.36098,0.180491 q 2.72742,1.363707 4.17134,4.411993 0.80218,1.68458 1.48404,1.68458 0.58158,0 1.103,-1.042835 0.54147,-1.042835 1.22332,-1.042835 0.94256,0 0.94256,0.942562 0,1.102999 -1.40381,1.865071 -1.20327,0.641744 -2.4266,0.641744 -1.58431,0 -3.16861,-1.363707 -0.42115,-0.381036 -2.36644,-2.546924 -1.22332,-1.363707 -1.96534,-1.363707 -0.48131,0 -1.12305,0.742017 -0.62169,0.742017 -1.103,0.742017 -0.68185,0 -0.68185,-0.561526 0,-0.4412 1.90518,-2.226052 0.0802,-0.08022 3.32905,-3.369159 1.8049,-1.764797 5.27433,-5.394665 -0.84229,-0.160436 -1.60436,-0.421145 -0.40109,-0.140381 -1.98539,-1.123052 -1.34366,-0.822236 -1.90518,-0.822236 -0.56153,0 -0.94257,0.461254 -0.38103,0.4412 -0.38103,1.002726 0,0.340927 0.34092,0.862344 0.34093,0.501363 0.34093,0.762072 0,0.481308 -0.58158,0.481308 -0.64174,0 -1.103,-0.982671 -0.38103,-0.802181 -0.38103,-1.544198 0,-1.504089 1.12305,-2.647196 1.12305,-1.163162 2.60709,-1.163162 1.20327,0 3.40926,1.24338 2.206,1.223326 3.38922,1.223326 0.70191,0 1.28349,-1.203272 0.58158,-1.203271 1.24338,-1.203271 0.6618,0 0.6618,0.742018 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path859"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 217.6399,14.760125 q 0,3.028232 -3.66997,4.572429 -2.18595,0.902454 -6.29712,1.403817 -0.16044,1.123053 -0.16044,2.145833 0,3.54965 3.46943,3.54965 1.12306,0 2.4266,-1.183217 1.30354,-1.183216 1.22333,-1.183216 0.14038,0 0.46125,0.320872 0.32087,0.300818 0.32087,0.441199 0,0.140382 -0.12032,0.300818 -2.54693,3.429322 -5.89603,3.429322 -2.18595,0 -3.5296,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82769,-8.12208 2.84774,-3.50954 6.61799,-3.50954 1.50409,0 2.50682,0.762072 1.10299,0.842289 1.10299,2.286215 z m -2.54692,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84502,0 -3.54965,2.205997 -1.3236,1.704634 -2.04556,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path861"
|
||||
inkscape:connector-curvature="0" />
|
||||
</svg:g>
|
||||
<svg:path
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#fa005a;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
d="m 16.886254,1.9938812 c -3.910133,-0.022008 -6.9535917,2.245681 -9.0800783,4.662109 -2.0973298,2.383295 -3.833062,5.3337288 -5.6621092,8.9804688 -2.05699735,4.100418 1.0356001,9.111984 5.6230472,9.111328 H 22.208519 c 1.140931,2.016544 2.30672,3.900631 3.666016,5.457031 2.103842,2.40891 5.109808,4.690926 9.011718,4.712891 3.908789,0.02201 6.953047,-2.243435 9.080078,-4.660157 2.097507,-2.383176 3.833076,-5.333755 5.66211,-8.980468 C 51.685439,17.176666 48.59284,12.1651 44.005394,12.165756 H 29.563988 C 28.422971,10.14915 27.258741,8.2650182 25.899925,6.7087242 23.796753,4.2998952 20.789506,2.0158522 16.886254,1.9938812 Z m -0.02344,4.4375 c 2.180149,0.012272 4.038227,1.299637 5.693359,3.1953118 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 a 2.2186407,2.2186407 0 0 0 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 C 27.969301,25.86123 26.721297,23.841706 25.429222,21.46849 A 2.2186407,2.2186407 0 0 0 23.480003,20.310287 H 7.7671137 C 6.2946566,20.310498 5.448665,18.940873 6.1089102,17.62474 7.8599547,14.133521 9.4461247,11.50847 11.138206,9.585677 12.801131,7.6960182 14.689541,6.4191492 16.862816,6.4313812 Z"
|
||||
id="path819"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path837"
|
||||
d="m 43.966331,30.257552 c -29.310887,4.106878 -14.655444,2.053439 0,0 z M 16.862814,6.4313812 c 2.180149,0.012272 4.038227,1.299637 5.693359,3.1953118 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 0.388515,0.713988 1.136369,1.158355 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 -1.245075,-1.425619 -2.493079,-3.445143 -3.785154,-5.818359 -0.388515,-0.713988 -1.13637,-1.158355 -1.949219,-1.158203 H 7.7671137 C 6.2946566,20.310498 5.448665,18.940873 6.1089102,17.62474 7.8599547,14.133521 9.4461247,11.50847 11.138206,9.585677 12.801131,7.6960182 14.689541,6.4191492 16.862816,6.4313812 Z"
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffc500;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
sodipodi:nodetypes="cccsccccscsccccscc" />
|
||||
</svg:svg>
|
Before Width: | Height: | Size: 22 KiB |
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 5.6 KiB |
|
@ -1,83 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg:svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
height="68.119537"
|
||||
width="68.119537"
|
||||
version="1.1"
|
||||
id="svg912"
|
||||
sodipodi:docname="breezewiki-icon.svg"
|
||||
inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)"
|
||||
inkscape:export-filename="/tmp/breezewiki-icon.png"
|
||||
inkscape:export-xdpi="721.55511"
|
||||
inkscape:export-ydpi="721.55511">
|
||||
<svg:metadata
|
||||
id="metadata918">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title />
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</svg:metadata>
|
||||
<svg:defs
|
||||
id="defs916" />
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1882"
|
||||
inkscape:window-height="1059"
|
||||
id="namedview914"
|
||||
showgrid="false"
|
||||
showguides="false"
|
||||
inkscape:zoom="8"
|
||||
inkscape:cx="40.321257"
|
||||
inkscape:cy="38.370751"
|
||||
inkscape:window-x="38"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg912"
|
||||
showborder="true"
|
||||
inkscape:pagecheckerboard="true"
|
||||
inkscape:snap-bbox="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-smooth-nodes="true" />
|
||||
<svg:circle
|
||||
style="opacity:1;fill:#520044;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:8;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.75294118;paint-order:stroke fill markers"
|
||||
id="path835"
|
||||
cx="34.059769"
|
||||
cy="34.059769"
|
||||
r="34.059769" />
|
||||
<div
|
||||
id="saka-gui-root">
|
||||
<div>
|
||||
<div>
|
||||
<style />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<svg:path
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#fa005a;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
d="m 25.016194,17.638372 c -3.910133,-0.02201 -6.953592,2.245681 -9.080078,4.662109 -2.09733,2.383295 -3.833062,5.333729 -5.662109,8.980469 -2.0569978,4.100418 1.0356,9.111984 5.623047,9.111328 h 14.441405 c 1.140931,2.016544 2.30672,3.900631 3.666016,5.457031 2.103842,2.40891 5.109808,4.690926 9.011718,4.712891 3.908789,0.02201 6.953047,-2.243435 9.080078,-4.660157 2.097507,-2.383176 3.833076,-5.333755 5.66211,-8.980468 2.056998,-4.100418 -1.035601,-9.111984 -5.623047,-9.111328 H 37.693928 c -1.141017,-2.016606 -2.305247,-3.900738 -3.664063,-5.457032 -2.103172,-2.408829 -5.110419,-4.692872 -9.013671,-4.714843 z m -0.02344,4.4375 c 2.180149,0.01227 4.038227,1.299637 5.693359,3.195312 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 a 2.2186407,2.2186407 0 0 0 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 -1.245075,-1.425619 -2.493079,-3.445143 -3.785154,-5.818359 A 2.2186407,2.2186407 0 0 0 31.609943,35.954778 H 15.897054 c -1.472457,2.11e-4 -2.318449,-1.369414 -1.658204,-2.685547 1.751045,-3.491219 3.337215,-6.11627 5.029296,-8.039063 1.662925,-1.889659 3.551335,-3.166528 5.72461,-3.154296 z"
|
||||
id="path819"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path837"
|
||||
d="m 52.096271,45.902043 c -29.310887,4.106878 -14.655444,2.053439 0,0 z M 24.992754,22.075872 c 2.180149,0.01227 4.038227,1.299637 5.693359,3.195312 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 0.388515,0.713988 1.136369,1.158355 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 -1.245075,-1.425619 -2.493079,-3.445143 -3.785154,-5.818359 -0.388515,-0.713988 -1.13637,-1.158355 -1.949219,-1.158203 H 15.897054 c -1.472457,2.11e-4 -2.318449,-1.369414 -1.658204,-2.685547 1.751045,-3.491219 3.337215,-6.11627 5.029296,-8.039063 1.662925,-1.889659 3.551335,-3.166528 5.72461,-3.154296 z"
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffc500;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
sodipodi:nodetypes="cccsccccscsccccscc" />
|
||||
</svg:svg>
|
Before Width: | Height: | Size: 6.9 KiB |
|
@ -1,142 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg:svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
height="36.41787"
|
||||
width="224.9014"
|
||||
version="1.1"
|
||||
id="svg912"
|
||||
sodipodi:docname="breezewiki-master.svg"
|
||||
inkscape:version="0.92.5 (2060ec1f9f, 2020-04-08)">
|
||||
<svg:metadata
|
||||
id="metadata918">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</svg:metadata>
|
||||
<svg:defs
|
||||
id="defs916" />
|
||||
<sodipodi:namedview
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1"
|
||||
objecttolerance="10"
|
||||
gridtolerance="10"
|
||||
guidetolerance="10"
|
||||
inkscape:pageopacity="0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:window-width="1882"
|
||||
inkscape:window-height="1059"
|
||||
id="namedview914"
|
||||
showgrid="false"
|
||||
showguides="false"
|
||||
inkscape:zoom="2.8284271"
|
||||
inkscape:cx="123.17581"
|
||||
inkscape:cy="7.7496502"
|
||||
inkscape:window-x="38"
|
||||
inkscape:window-y="0"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:current-layer="svg912"
|
||||
showborder="true"
|
||||
inkscape:pagecheckerboard="true"
|
||||
inkscape:snap-bbox="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-smooth-nodes="true" />
|
||||
<svg:circle
|
||||
style="opacity:1;fill:#520044;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:8;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.75294118;paint-order:stroke fill markers"
|
||||
id="path835"
|
||||
cx="25.929829"
|
||||
cy="18.415277"
|
||||
r="34.059769" />
|
||||
<div
|
||||
id="saka-gui-root">
|
||||
<div>
|
||||
<div>
|
||||
<style />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<svg:g
|
||||
aria-label="wiki"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;line-height:2.67644334px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;opacity:1;fill:#000000;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="text926"
|
||||
transform="translate(97.9947,1.1875002)">
|
||||
<svg:path
|
||||
d="m 81.588122,13.020001 q 0,0.08533 0,0.298666 -0.04267,0.256001 -0.04267,0.384001 0,0.128 -0.341333,0.213333 -0.810667,0.170667 -0.981334,0.256 -0.341333,0.170667 -0.725333,0.725333 -0.512,0.725334 -1.237333,2.517334 l -4.480001,11.008 q -0.128,0.298667 -0.725333,0.298667 -0.554667,0 -0.682667,-0.298667 l -3.584,-8.874667 -3.626667,8.874667 q -0.128,0.298667 -0.725333,0.298667 -0.554667,0 -0.682667,-0.298667 L 58.846788,16.561334 q -0.554667,-1.322666 -1.066667,-1.92 -0.469333,-0.64 -1.706667,-0.896 -0.341333,-0.08533 -0.341333,-0.256 v -0.725333 q 0,-0.298667 0.298667,-0.298667 0.896,0 2.474666,0.08533 1.749334,0.08533 2.432,0.08533 0,0 4.821334,0 1.152,0 3.456,-0.08533 2.304,-0.08533 3.456,-0.08533 0.341333,0 0.341333,0.469333 0,0.08533 0,0.298667 -0.04267,0.170667 -0.04267,0.256 0,0.256 -0.981334,0.384 -0.981333,0.128 -0.981333,0.682667 0,0.256 0.128,0.597333 l 3.157333,8.064 2.986667,-7.552 q 0.213333,-0.554666 0.213333,-0.938666 0,-0.554667 -0.810666,-0.682667 -0.810667,-0.128 -0.810667,-0.384 v -0.725334 q 0,-0.298666 0.469333,-0.298666 0.384,0 1.194667,0.128 0.810667,0.08533 1.194667,0.08533 0.426667,0 1.194667,-0.08533 0.768,-0.128 1.024,-0.128 0.64,0 0.64,0.384 z m -13.525334,4.693333 -0.853333,-2.133333 q -0.554667,-1.408 -2.517334,-1.408 h -1.536 q -0.810667,0 -0.810667,0.64 0,0.256 0.128,0.597333 l 3.242667,8.149334 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path842"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 91.700066,5.6813339 q 0,1.1093334 -0.768,1.8773334 -0.768,0.768 -1.877333,0.768 -1.109333,0 -1.877333,-0.768 -0.768,-0.768 -0.768,-1.8773334 0,-1.1093333 0.768,-1.8773334 0.768,-0.768 1.877333,-0.768 1.109333,0 1.877333,0.768 0.768,0.7680001 0.768,1.8773334 z m 1.706667,22.3573341 q 0,0.298667 -0.298667,0.298667 -0.298666,0 -0.298666,0 -1.962667,-0.170667 -3.669334,-0.170667 -1.621333,0 -3.584,0.170667 0,0 -0.170666,0 -0.469334,0 -0.469334,-0.426667 0,-0.08533 0.04267,-0.298667 0,-0.213333 0,-0.298666 0,-0.256 0.853333,-0.384 0.853334,-0.128 1.066667,-0.554667 0.256,-0.469333 0.256,-1.834667 v -6.528 q 0,-1.322667 -0.256,-1.749333 -0.170667,-0.298667 -1.109333,-0.682667 -0.896,-0.384 -0.896,-0.768 0,-0.256 0.128,-0.469333 0.04267,-0.08533 2.858666,-1.194667 2.816,-1.152 3.029334,-1.152 0.341333,0 0.341333,0.341333 v 11.904 q 0,1.749334 0.170667,2.090667 0.256,0.469334 1.109333,0.597334 0.896,0.128 0.896,0.384 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path844"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 114.52672,27.526668 q 0,0.810667 -0.29867,0.810667 -0.59733,0 -1.87733,-0.08533 -1.23733,-0.08533 -1.87733,-0.08533 -0.59734,0 -1.83467,0.08533 -1.19467,0.08533 -1.792,0.08533 -0.384,0 -0.384,-0.469334 0,-0.08533 0.0427,-0.298666 0,-0.170667 0,-0.256 0,-0.256 0.55466,-0.341334 0.59734,-0.128 0.59734,-0.512 0,-0.298666 -0.512,-1.152 l -3.24267,-5.418667 -1.87733,1.408 v 3.2 q 0,1.408001 0.21333,1.877334 0.21333,0.426667 1.152,0.554667 0.81067,0.128 0.81067,0.384 v 0.725333 q 0,0.298667 -0.59734,0.298667 -0.59733,0 -1.83466,-0.08533 -1.19467,-0.08533 -1.792004,-0.08533 -0.597333,0 -1.834667,0.08533 -1.194666,0.08533 -1.792,0.08533 -0.597333,0 -0.597333,-0.426667 v -0.597333 q 0,-0.256 0.853333,-0.384 0.853334,-0.128 1.066667,-0.554667 0.256,-0.469333 0.256,-1.834667 V 6.4920006 q 0,-1.3226667 -0.256,-1.7493334 -0.170667,-0.2986666 -1.109333,-0.6826666 -0.896001,-0.3840001 -0.896001,-0.7680001 0,-0.256 0.128,-0.4693333 0.04267,-0.085333 2.858667,-1.1946667 2.816001,-1.15200005 3.029331,-1.15200005 0.34134,0 0.34134,0.34133334 V 19.718668 q 5.71733,-4.309334 5.71733,-5.290667 0,-0.682667 -1.792,-0.682667 -0.46933,0 -0.46933,-0.512 0,-0.768 0.42666,-0.768 0.55467,0 1.70667,0.08533 1.19467,0.08533 1.74933,0.08533 0.55467,0 1.70667,-0.08533 1.152,-0.08533 1.70667,-0.08533 0.42666,0 0.42666,0.469333 0,0.08533 0,0.298667 -0.0427,0.170667 -0.0427,0.298667 0,0.128 -0.384,0.213333 -0.64,0.08533 -1.83467,0.426667 -0.512,0.213333 -2.048,1.578667 -1.024,0.896 -2.09067,1.834666 l 4.224,6.656 q 1.57867,2.474667 2.60267,2.688001 0.29867,0.04267 0.59733,0.128 0.29867,0.08533 0.29867,0.469333 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path846"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 123.69999,5.6813339 q 0,1.1093334 -0.768,1.8773334 -0.768,0.768 -1.87734,0.768 -1.10933,0 -1.87733,-0.768 -0.768,-0.768 -0.768,-1.8773334 0,-1.1093333 0.768,-1.8773334 0.768,-0.768 1.87733,-0.768 1.10934,0 1.87734,0.768 0.768,0.7680001 0.768,1.8773334 z m 1.70666,22.3573341 q 0,0.298667 -0.29866,0.298667 -0.29867,0 -0.29867,0 -1.96267,-0.170667 -3.66933,-0.170667 -1.62134,0 -3.584,0.170667 0,0 -0.17067,0 -0.46933,0 -0.46933,-0.426667 0,-0.08533 0.0427,-0.298667 0,-0.213333 0,-0.298666 0,-0.256 0.85334,-0.384 0.85333,-0.128 1.06666,-0.554667 0.256,-0.469333 0.256,-1.834667 v -6.528 q 0,-1.322667 -0.256,-1.749333 -0.17066,-0.298667 -1.10933,-0.682667 -0.896,-0.384 -0.896,-0.768 0,-0.256 0.128,-0.469333 0.0427,-0.08533 2.85867,-1.194667 2.816,-1.152 3.02933,-1.152 0.34133,0 0.34133,0.341333 v 11.904 q 0,1.749334 0.17067,2.090667 0.256,0.469334 1.10933,0.597334 0.896,0.128 0.896,0.384 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:42.66666794px;font-family:Alfios;-inkscape-font-specification:'Alfios Bold';text-align:start;text-anchor:start;fill:#000000;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="path848"
|
||||
inkscape:connector-curvature="0" />
|
||||
</svg:g>
|
||||
<svg:g
|
||||
aria-label="breeze"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;line-height:2.57638931px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;opacity:1;fill:#252525;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:0.72772277;paint-order:stroke fill markers"
|
||||
id="text930"
|
||||
transform="translate(-69.005304,1.5000002)">
|
||||
<svg:path
|
||||
d="m 162.59026,13.697235 q 0,0.782126 -0.48131,1.363707 -0.46126,0.581581 -1.22333,0.581581 -0.46125,0 -0.92251,-0.421145 -0.44119,-0.421144 -0.80218,-0.421144 -0.16043,0 -0.9225,0.80218 -1.28349,1.323598 -2.68731,3.690031 -1.34365,2.266161 -2.00545,4.111176 -0.48131,1.504089 -1.52414,4.452103 -0.28077,0.701908 -0.80219,0.701908 -0.50136,0 -0.98267,-0.240654 -0.58158,-0.280763 -0.58158,-0.721962 0,-0.180491 0.10028,-0.461254 l 2.52686,-7.259736 q 0.8824,-2.526869 0.8824,-4.111176 0,-1.383761 -0.80218,-1.383761 -1.103,0 -2.08567,1.263434 -0.96261,1.263435 -0.98267,1.263435 -0.14038,0 -0.46125,-0.2206 -0.32087,-0.240654 -0.32087,-0.381036 0,-0.140381 0.0802,-0.280763 0.80218,-1.383762 1.88513,-2.607087 1.50409,-1.664525 2.52687,-1.664525 2.12578,0 2.12578,3.10845 0,1.443925 -0.42115,3.288941 3.9708,-6.4375 6.05647,-6.4375 0.76207,0 1.28348,0.601635 0.54148,0.601636 0.54148,1.383762 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path853"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 148.05073,15.502142 q 0,4.672702 -5.71554,9.485786 -4.25155,3.569704 -7.74104,3.569704 -1.38376,0 -2.24611,-0.661799 -0.96261,-0.721962 -0.96261,-2.065615 0,-0.561526 0.20054,-1.24338 l 5.85592,-20.1748444 q 0.12033,-0.4211449 0.12033,-0.7420171 0,-0.8823988 -1.12305,-1.1832166 -1.12306,-0.3008177 -1.12306,-0.5013629 0,-0.7821262 0.62169,-1.04283489 0.30082,-0.0200545 0.78213,-0.0601636 0.56153,-0.0601635 2.28621,-0.4612539 Q 140.73083,-2.0861626e-7 140.791,-2.0861626e-7 q 0.42114,0 0.42114,0.38103582861626 0,0.18049066 -1.20327,4.33177578 L 136.09824,18.06912 q 5.03368,-6.357282 9.06464,-6.357282 1.34365,0 2.16589,1.263434 0.72196,1.102999 0.72196,2.52687 z m -2.92796,0.200545 q 0,-1.764798 -1.48403,-1.764798 -2.54693,0 -6.05647,4.331776 -3.42932,4.211449 -3.42932,6.83859 0,1.965343 1.36371,1.965343 2.78758,0 6.25701,-4.311721 3.3491,-4.17134 3.3491,-7.05919 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path851"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 175.36498,14.760125 q 0,3.028232 -3.66997,4.572429 -2.18595,0.902454 -6.29712,1.403817 -0.16044,1.123053 -0.16044,2.145833 0,3.54965 3.46943,3.54965 1.12306,0 2.4266,-1.183217 1.30354,-1.183216 1.22332,-1.183216 0.14039,0 0.46126,0.320872 0.32087,0.300818 0.32087,0.441199 0,0.140382 -0.12033,0.300818 -2.54692,3.429322 -5.89602,3.429322 -2.18595,0 -3.5296,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82769,-8.12208 2.84774,-3.50954 6.61799,-3.50954 1.50409,0 2.50681,0.762072 1.103,0.842289 1.103,2.286215 z m -2.54692,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84502,0 -3.54965,2.205997 -1.3236,1.704634 -2.04556,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path855"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 188.96195,14.760125 q 0,3.028232 -3.66998,4.572429 -2.18594,0.902454 -6.29712,1.403817 -0.16043,1.123053 -0.16043,2.145833 0,3.54965 3.46943,3.54965 1.12305,0 2.4266,-1.183217 1.30354,-1.183216 1.22332,-1.183216 0.14038,0 0.46125,0.320872 0.32088,0.300818 0.32088,0.441199 0,0.140382 -0.12033,0.300818 -2.54692,3.429322 -5.89603,3.429322 -2.18594,0 -3.52959,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82768,-8.12208 2.84775,-3.50954 6.618,-3.50954 1.50408,0 2.50681,0.762072 1.103,0.842289 1.103,2.286215 z m -2.54693,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84501,0 -3.54964,2.205997 -1.3236,1.704634 -2.04557,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path857"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 204.80502,12.514019 q 0,0.721962 -0.92251,1.704634 -0.36098,0.40109 -6.39739,6.477609 -1.34366,1.283489 -3.7502,3.850467 l 0.36098,0.180491 q 2.72742,1.363707 4.17134,4.411993 0.80218,1.68458 1.48404,1.68458 0.58158,0 1.103,-1.042835 0.54147,-1.042835 1.22332,-1.042835 0.94256,0 0.94256,0.942562 0,1.102999 -1.40381,1.865071 -1.20327,0.641744 -2.4266,0.641744 -1.58431,0 -3.16861,-1.363707 -0.42115,-0.381036 -2.36644,-2.546924 -1.22332,-1.363707 -1.96534,-1.363707 -0.48131,0 -1.12305,0.742017 -0.62169,0.742017 -1.103,0.742017 -0.68185,0 -0.68185,-0.561526 0,-0.4412 1.90518,-2.226052 0.0802,-0.08022 3.32905,-3.369159 1.8049,-1.764797 5.27433,-5.394665 -0.84229,-0.160436 -1.60436,-0.421145 -0.40109,-0.140381 -1.98539,-1.123052 -1.34366,-0.822236 -1.90518,-0.822236 -0.56153,0 -0.94257,0.461254 -0.38103,0.4412 -0.38103,1.002726 0,0.340927 0.34092,0.862344 0.34093,0.501363 0.34093,0.762072 0,0.481308 -0.58158,0.481308 -0.64174,0 -1.103,-0.982671 -0.38103,-0.802181 -0.38103,-1.544198 0,-1.504089 1.12305,-2.647196 1.12305,-1.163162 2.60709,-1.163162 1.20327,0 3.40926,1.24338 2.206,1.223326 3.38922,1.223326 0.70191,0 1.28349,-1.203272 0.58158,-1.203271 1.24338,-1.203271 0.6618,0 0.6618,0.742018 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path859"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
d="m 217.6399,14.760125 q 0,3.028232 -3.66997,4.572429 -2.18595,0.902454 -6.29712,1.403817 -0.16044,1.123053 -0.16044,2.145833 0,3.54965 3.46943,3.54965 1.12306,0 2.4266,-1.183217 1.30354,-1.183216 1.22333,-1.183216 0.14038,0 0.46125,0.320872 0.32087,0.300818 0.32087,0.441199 0,0.140382 -0.12032,0.300818 -2.54693,3.429322 -5.89603,3.429322 -2.18595,0 -3.5296,-1.544197 -1.28349,-1.46398 -1.28349,-3.669977 0,-4.612539 2.82769,-8.12208 2.84774,-3.50954 6.61799,-3.50954 1.50409,0 2.50682,0.762072 1.10299,0.842289 1.10299,2.286215 z m -2.54692,0.220599 q 0,-1.784852 -1.46398,-1.784852 -1.84502,0 -3.54965,2.205997 -1.3236,1.704634 -2.04556,3.890576 7.05919,-0.822235 7.05919,-4.311721 z"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:41.07165146px;font-family:Alexander;-inkscape-font-specification:'Alexander Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#252525;fill-opacity:1;stroke:#ffffff;stroke-width:3;stroke-opacity:0.72772277"
|
||||
id="path861"
|
||||
inkscape:connector-curvature="0" />
|
||||
</svg:g>
|
||||
<svg:path
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#fa005a;fill-opacity:1;fill-rule:nonzero;stroke:#ffffff;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
d="m 16.886254,1.9938812 c -3.910133,-0.022008 -6.9535917,2.245681 -9.0800783,4.662109 -2.0973298,2.383295 -3.833062,5.3337288 -5.6621092,8.9804688 -2.05699735,4.100418 1.0356001,9.111984 5.6230472,9.111328 H 22.208519 c 1.140931,2.016544 2.30672,3.900631 3.666016,5.457031 2.103842,2.40891 5.109808,4.690926 9.011718,4.712891 3.908789,0.02201 6.953047,-2.243435 9.080078,-4.660157 2.097507,-2.383176 3.833076,-5.333755 5.66211,-8.980468 C 51.685439,17.176666 48.59284,12.1651 44.005394,12.165756 H 29.563988 C 28.422971,10.14915 27.258741,8.2650182 25.899925,6.7087242 23.796753,4.2998952 20.789506,2.0158522 16.886254,1.9938812 Z m -0.02344,4.4375 c 2.180149,0.012272 4.038227,1.299637 5.693359,3.1953118 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 a 2.2186407,2.2186407 0 0 0 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 C 27.969301,25.86123 26.721297,23.841706 25.429222,21.46849 A 2.2186407,2.2186407 0 0 0 23.480003,20.310287 H 7.7671137 C 6.2946566,20.310498 5.448665,18.940873 6.1089102,17.62474 7.8599547,14.133521 9.4461247,11.50847 11.138206,9.585677 12.801131,7.6960182 14.689541,6.4191492 16.862816,6.4313812 Z"
|
||||
id="path819"
|
||||
inkscape:connector-curvature="0" />
|
||||
<svg:path
|
||||
inkscape:connector-curvature="0"
|
||||
id="path837"
|
||||
d="m 43.966331,30.257552 c -29.310887,4.106878 -14.655444,2.053439 0,0 z M 16.862814,6.4313812 c 2.180149,0.012272 4.038227,1.299637 5.693359,3.1953118 1.245218,1.426187 2.495246,3.44536 3.78711,5.818359 0.388515,0.713988 1.136369,1.158355 1.949218,1.158204 h 15.712891 c 1.472457,-2.11e-4 2.318449,1.369414 1.658203,2.685546 -1.751077,3.491284 -3.337367,6.116702 -5.029297,8.039063 -1.662405,1.888817 -3.551482,3.164578 -5.724609,3.152344 -2.180003,-0.01227 -4.040284,-1.298343 -5.695313,-3.19336 -1.245075,-1.425619 -2.493079,-3.445143 -3.785154,-5.818359 -0.388515,-0.713988 -1.13637,-1.158355 -1.949219,-1.158203 H 7.7671137 C 6.2946566,20.310498 5.448665,18.940873 6.1089102,17.62474 7.8599547,14.133521 9.4461247,11.50847 11.138206,9.585677 12.801131,7.6960182 14.689541,6.4191492 16.862816,6.4313812 Z"
|
||||
style="color:#000000;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:medium;line-height:normal;font-family:sans-serif;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration:none;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;letter-spacing:normal;word-spacing:normal;text-transform:none;writing-mode:lr-tb;direction:ltr;text-orientation:mixed;dominant-baseline:auto;baseline-shift:baseline;text-anchor:start;white-space:normal;shape-padding:0;clip-rule:nonzero;display:inline;overflow:visible;visibility:visible;opacity:1;isolation:auto;mix-blend-mode:normal;color-interpolation:sRGB;color-interpolation-filters:linearRGB;solid-color:#000000;solid-opacity:1;vector-effect:none;fill:#ffc500;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:0.72772277;paint-order:stroke fill markers;color-rendering:auto;image-rendering:auto;shape-rendering:auto;text-rendering:auto;enable-background:accumulate"
|
||||
sodipodi:nodetypes="cccsccccscsccccscc" />
|
||||
</svg:svg>
|
Before Width: | Height: | Size: 22 KiB |
|
@ -1,53 +0,0 @@
|
|||
// countdown timer for gacha enthusiasts
|
||||
// sample: bandori/wiki/BanG_Dream!_Wikia
|
||||
// sample: ensemble-stars/wiki/The_English_Ensemble_Stars_Wiki
|
||||
|
||||
import {h, htm, render, signal, computed, effect} from "./preact.js"
|
||||
const html = htm.bind(h)
|
||||
|
||||
const now = signal(Date.now())
|
||||
setInterval(() => now.value = Date.now(), 1000)
|
||||
|
||||
const units = [
|
||||
["w", 7*24*60*60*1000],
|
||||
["d", 24*60*60*1000],
|
||||
["h", 60*60*1000],
|
||||
["m", 60*1000],
|
||||
["s", 1000]
|
||||
]
|
||||
|
||||
function getDisplayTime(datetime, now, or) {
|
||||
let difference = datetime - now
|
||||
let foundSignificantField = false
|
||||
if (difference > 0) {
|
||||
return units.map(([letter, duration], index) => {
|
||||
const multiplier = Math.floor(difference / duration)
|
||||
difference -= multiplier * duration
|
||||
if (multiplier > 0 || foundSignificantField) {
|
||||
foundSignificantField = true
|
||||
return multiplier + letter
|
||||
}
|
||||
}).filter(s => s).join(" ")
|
||||
} else if (or) {
|
||||
return or
|
||||
} else {
|
||||
return `[timer ended on ${new Date(datetime).toLocaleString()}]`
|
||||
}
|
||||
}
|
||||
|
||||
function Countdown(props) {
|
||||
return html`<span>${props.display}</span>`
|
||||
}
|
||||
|
||||
document.querySelectorAll(".countdown").forEach(eCountdown => {
|
||||
// grab information and make variables
|
||||
const eDate = eCountdown.querySelector(".countdowndate")
|
||||
const eOr = eCountdown.nextElementSibling
|
||||
const or = eOr?.textContent
|
||||
const datetime = new Date(eDate.textContent).getTime()
|
||||
// the mapped signal
|
||||
const display = computed(() => getDisplayTime(datetime, now.value, or))
|
||||
// clear content and render
|
||||
while (eDate.childNodes[0] !== undefined) eDate.childNodes[0].remove()
|
||||
render(html`<${Countdown} display=${display} />`, eDate);
|
||||
})
|
|
@ -1,2 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg fill="currentColor" width="32" height="32" version="1.1" viewBox="0 0 8.46667 8.46667" xmlns="http://www.w3.org/2000/svg"><g transform="translate(0 -288.533)"><path transform="matrix(.264583 0 0 .264583 0 288.533)" d="m23.0039 3.875-1.31445 2.27344a11 11 0 013.31055 7.85156 11 11 0 01-11 11 11 11 0 01-7.85938-3.30664l-2.26172 1.30859c2.93591 5.08516 8.77013 7.80476 14.5527 6.78516 5.78263-1.01964 10.3378-5.57482 11.3574-11.3574 1.01965-5.7826-1.7-11.6188-6.78516-14.5547z" color-rendering="auto" dominant-baseline="auto" image-rendering="auto" shape-rendering="auto" style="font-feature-settings:normal;font-variant-alternates:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-variant-numeric:normal;font-variant-position:normal;isolation:auto;mix-blend-mode:normal;paint-order:stroke fill markers;shape-padding:0;text-decoration-line:none;text-decoration-style:solid;text-indent:0;text-orientation:mixed;text-transform:none;white-space:normal"/><circle cx="2.11667" cy="290.65" r=".529167" style="paint-order:stroke fill markers"/><circle cx="1.5875" cy="292.237" r=".529167" style="paint-order:stroke fill markers"/><circle cx="3.70417" cy="290.121" r=".529167" style="paint-order:stroke fill markers"/></g></svg>
|
Before Width: | Height: | Size: 1.3 KiB |
|
@ -1,2 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<svg width="32" height="32" fill="currentColor" version="1.1" viewBox="0 0 8.46667 8.46667" xmlns="http://www.w3.org/2000/svg"><path d="m4.23242.5295c-2.03949 0-3.70313 1.66559-3.70313 3.70508 0 2.03948 1.66364 3.70312 3.70313 3.70312 2.03949 0 3.70508-1.66364 3.70508-3.70312 0-2.03949-1.66559-3.70508-3.70508-3.70508zm0 1.05859c1.46752 0 2.64648 1.17897 2.64648 2.64649s-1.17896 2.64453-2.64648 2.64453c-1.46752 0-2.64453-1.17701-2.64453-2.64453s1.17701-2.64649 2.64453-2.64649z" color-rendering="auto" dominant-baseline="auto" image-rendering="auto" shape-rendering="auto" style="font-feature-settings:normal;font-variant-alternates:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-variant-numeric:normal;font-variant-position:normal;isolation:auto;mix-blend-mode:normal;paint-order:stroke fill markers;shape-padding:0;text-decoration-color:#000000;text-decoration-line:none;text-decoration-style:solid;text-indent:0;text-orientation:mixed;text-transform:none;white-space:normal"/><path d="m4.23333 2.64583a1.5875 1.5875 0 00-1.495 1.05937h2.99a1.5875 1.5875 0 00-1.495-1.05937zm-1.495 2.11563a1.5875 1.5875 0 001.495 1.05937 1.5875 1.5875 0 001.495-1.05937h-2.99z" style="paint-order:stroke fill markers"/></svg>
|
Before Width: | Height: | Size: 1.2 KiB |
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue