forked from cadence/breezewiki
		
	Add more indexers and Solr configuration
This commit is contained in:
		
							parent
							
								
									43c3f70736
								
							
						
					
					
						commit
						57e0d20657
					
				
					 42 changed files with 9016 additions and 33 deletions
				
			
		
							
								
								
									
										179
									
								
								archiver/fts.rkt
									
										
									
									
									
								
							
							
						
						
									
										179
									
								
								archiver/fts.rkt
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1,4 +1,4 @@
 | 
			
		|||
#lang racket/base
 | 
			
		||||
#lang racket
 | 
			
		||||
(require racket/function
 | 
			
		||||
         racket/future
 | 
			
		||||
         racket/match
 | 
			
		||||
| 
						 | 
				
			
			@ -9,12 +9,35 @@
 | 
			
		|||
         file/gunzip
 | 
			
		||||
         db
 | 
			
		||||
         db/unsafe/sqlite3
 | 
			
		||||
         net/http-easy
 | 
			
		||||
         json
 | 
			
		||||
         json-pointer
 | 
			
		||||
         "../lib/html-parsing/main.rkt"
 | 
			
		||||
         "../lib/xexpr-utils.rkt"
 | 
			
		||||
         "../lib/tree-updater.rkt")
 | 
			
		||||
 | 
			
		||||
(define-syntax (seq stx)
 | 
			
		||||
  (syntax-case stx ()
 | 
			
		||||
    [(_ body ...)
 | 
			
		||||
     #`(for ([op (list (lambda () body) ...)]
 | 
			
		||||
             [i (in-naturals)])
 | 
			
		||||
         (define res (op))
 | 
			
		||||
         (when (>= (response-status-code res) 400)
 | 
			
		||||
           (error 'seq "op #~a: status code was ~a: ~v" i (response-status-code res) (response-json res)))
 | 
			
		||||
         (define taskuid (json-pointer-value "/taskUid" (response-json res)))
 | 
			
		||||
         (for/or ([ticks (in-naturals)]
 | 
			
		||||
                  [res2 (in-producer (lambda () (get (format "http://localhost:7700/tasks/~a" taskuid))))])
 | 
			
		||||
           (define status (json-pointer-value "/status" (response-json res2)))
 | 
			
		||||
           (case status
 | 
			
		||||
             [("enqueued" "processing")
 | 
			
		||||
              (sleep 1)
 | 
			
		||||
              #f]
 | 
			
		||||
             [("succeeded")
 | 
			
		||||
              (printf "op #~a: ~a (~a ticks)~n" i status ticks)
 | 
			
		||||
              #t]
 | 
			
		||||
             [else
 | 
			
		||||
              (error 'seq "op #~a: task status was ~a: ~v" i status res2)])))]))
 | 
			
		||||
 | 
			
		||||
(define (class-has? attributes substrs)
 | 
			
		||||
  (define cl (or (get-attribute 'class attributes) ""))
 | 
			
		||||
  (ormap (λ (substr) (string-contains? cl substr)) substrs))
 | 
			
		||||
| 
						 | 
				
			
			@ -29,17 +52,27 @@
 | 
			
		|||
(define slc (sqlite3-connect #:database "../storage/fts-separate.db"))
 | 
			
		||||
(sqlite3-load-extension slc "fts5")
 | 
			
		||||
 | 
			
		||||
(define (writer page)
 | 
			
		||||
  (for ([bit page])
 | 
			
		||||
    (cond
 | 
			
		||||
      [(memq bit '(div p li td)) (displayln "")]
 | 
			
		||||
      [(symbol? bit) (void)]
 | 
			
		||||
      [(and (pair? bit) (eq? (car bit) '*COMMENT*)) (void)]
 | 
			
		||||
      [(and (pair? bit) (eq? (car bit) '@)) (void)]
 | 
			
		||||
      [(pair? bit) (writer bit)]
 | 
			
		||||
      [(string? bit) (display bit)])))
 | 
			
		||||
(define (writer tables-mode? page)
 | 
			
		||||
  (define (writer-inner page)
 | 
			
		||||
    (for ([bit page])
 | 
			
		||||
      (cond
 | 
			
		||||
        [(and tables-mode? (pair? bit) (memq (car bit) '(h1 h2 h3 p blockquote q))) (void)]
 | 
			
		||||
        [(and (not tables-mode?) (pair? bit) (memq (car bit) '(ul ol dl table))) (void)]
 | 
			
		||||
        [(memq bit '(div p li td dd dt br)) (displayln "")]
 | 
			
		||||
        [(symbol? bit) (void)]
 | 
			
		||||
        [(and (pair? bit) (eq? (car bit) '*COMMENT*)) (void)]
 | 
			
		||||
        [(and (pair? bit) (eq? (car bit) '@)) (void)]
 | 
			
		||||
        [(pair? bit) (writer-inner bit)]
 | 
			
		||||
        [(string? bit) (display bit)])))
 | 
			
		||||
  (writer-inner page))
 | 
			
		||||
 | 
			
		||||
(define wikiname "sto")
 | 
			
		||||
(define (write-and-post-process tables-mode? page)
 | 
			
		||||
  (define text (with-output-to-string (λ () (writer tables-mode? page))))
 | 
			
		||||
  ;; (define text-no-numbers (regexp-replace* #px"(?:-|[+$£€¥] *)?[0-9,.]{2,}%?\\s*" text ""))
 | 
			
		||||
  (define shrink-text (regexp-replace* #px"([ \t]*\r?\n+)+" text "\n"))
 | 
			
		||||
  shrink-text)
 | 
			
		||||
 | 
			
		||||
(define wikiname "bloons")
 | 
			
		||||
(define tablename (format "page_~a" wikiname))
 | 
			
		||||
 | 
			
		||||
(define ((extract f)) ; f - filename
 | 
			
		||||
| 
						 | 
				
			
			@ -55,35 +88,115 @@
 | 
			
		|||
         (read-json in)]
 | 
			
		||||
        [else #f]))
 | 
			
		||||
    (define title (json-pointer-value "/parse/title" j))
 | 
			
		||||
    (define pageid (json-pointer-value "/parse/pageid" j))
 | 
			
		||||
    (define page-html (preprocess-html-wiki (json-pointer-value "/parse/text" j)))
 | 
			
		||||
    (define page (update-tree updater (html->xexp page-html)))
 | 
			
		||||
    (define text (with-output-to-string (λ () (writer page))))
 | 
			
		||||
    (define shrink-text (regexp-replace* #px"([ \t]*\r?\n+)+" text "\n"))
 | 
			
		||||
    (values title shrink-text)))
 | 
			
		||||
    (define body (write-and-post-process #f page))
 | 
			
		||||
    (define table (write-and-post-process #t page))
 | 
			
		||||
    (values title body table pageid)))
 | 
			
		||||
 | 
			
		||||
(println "extracting text...")
 | 
			
		||||
(define results
 | 
			
		||||
  (time
 | 
			
		||||
   (for/list ([f (directory-list (format "../storage/archive/~a" wikiname) #:build? #t)]
 | 
			
		||||
              #:when (member (path-get-extension f) '(#".json" #".gz")))
 | 
			
		||||
     (extract f))))
 | 
			
		||||
  (for/list ([f (directory-list (format "../storage/archive/~a" wikiname) #:build? #t)]
 | 
			
		||||
             #:when (member (path-get-extension f) '(#".json" #".gz")))
 | 
			
		||||
    (extract f)))
 | 
			
		||||
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
;; TESTING WRITER
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
#;(for/first ([fut results]
 | 
			
		||||
            [i (in-naturals 1)]
 | 
			
		||||
            #:when (i . >= . 4859))
 | 
			
		||||
  (define-values (title body table pageid) (fut))
 | 
			
		||||
  (println title)
 | 
			
		||||
  (println body)
 | 
			
		||||
  (println table))
 | 
			
		||||
 | 
			
		||||
(println "inserting...")
 | 
			
		||||
(query-exec slc "begin transaction")
 | 
			
		||||
#;(query-exec slc (format "create virtual table \"~a\" using fts5 (title, body, tokenize='porter unicode61')" wikiname))
 | 
			
		||||
(time
 | 
			
		||||
 (for ([fut results]
 | 
			
		||||
       [i (in-naturals 1)])
 | 
			
		||||
   (display "-")
 | 
			
		||||
   (when (and (> i 0) (= (modulo i 100) 0))
 | 
			
		||||
     (println i))
 | 
			
		||||
   (define-values (title shrink-text) (fut))
 | 
			
		||||
   (query-exec slc (format "insert into \"~a\" (title, body) values (?, ?)" tablename) title shrink-text)))
 | 
			
		||||
 | 
			
		||||
(println "running optimize...")
 | 
			
		||||
(query-exec slc (format "insert into \"~a\" (\"~a\") values ('optimize')" tablename tablename))
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
;; SQLite FTS5
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
#;(begin
 | 
			
		||||
    (query-exec slc "begin transaction")
 | 
			
		||||
    #;(query-exec slc (format "create virtual table \"~a\" using fts5 (title, body, tokenize='porter unicode61')" wikiname))
 | 
			
		||||
    (time
 | 
			
		||||
     (for ([fut results]
 | 
			
		||||
           [i (in-naturals 1)])
 | 
			
		||||
       (display "-")
 | 
			
		||||
       (when (and (> i 0) (= (modulo i 100) 0))
 | 
			
		||||
         (println i))
 | 
			
		||||
       (define-values (title shrink-text) (fut))
 | 
			
		||||
       (query-exec slc (format "insert into \"~a\" (title, body) values (?, ?)" tablename) title shrink-text)))
 | 
			
		||||
 | 
			
		||||
(println "committing...")
 | 
			
		||||
(query-exec slc "commit")
 | 
			
		||||
    (println "running optimize...")
 | 
			
		||||
    (query-exec slc (format "insert into \"~a\" (\"~a\") values ('optimize')" tablename tablename))
 | 
			
		||||
 | 
			
		||||
    (println "committing...")
 | 
			
		||||
    (query-exec slc "commit"))
 | 
			
		||||
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
;; Solr
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
(begin
 | 
			
		||||
  (define data
 | 
			
		||||
    (cond
 | 
			
		||||
      #;[(file-exists? "cache.rkt")
 | 
			
		||||
       (println "reading in...")
 | 
			
		||||
       (with-input-from-file "cache.rkt" (λ () (read)))]
 | 
			
		||||
      [else
 | 
			
		||||
       (define data
 | 
			
		||||
         (for/list ([fut results]
 | 
			
		||||
                    [i (in-naturals 1)])
 | 
			
		||||
           (display "-")
 | 
			
		||||
           (when (and (> i 0) (= (modulo i 100) 0))
 | 
			
		||||
             (println i))
 | 
			
		||||
           (define-values (title body table pageid) (fut))
 | 
			
		||||
           (define len (string-length body))
 | 
			
		||||
           `#hasheq((id . ,(number->string pageid))
 | 
			
		||||
                    (title . ,title)
 | 
			
		||||
                    (body . ,body)
 | 
			
		||||
                    (table . ,table)
 | 
			
		||||
                    (len . ,len))))
 | 
			
		||||
 | 
			
		||||
       (println "writing out...")
 | 
			
		||||
       (with-output-to-file "cache.rkt" (λ () (write data)) #:exists 'truncate/replace)
 | 
			
		||||
       data]))
 | 
			
		||||
 | 
			
		||||
  (println "posting...")
 | 
			
		||||
  (define res
 | 
			
		||||
    (post (format "http://localhost:8983/solr/~a/update?commit=true" wikiname)
 | 
			
		||||
          #:json data)))
 | 
			
		||||
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
;; Meilisearch
 | 
			
		||||
;; ***************************************************************************************************
 | 
			
		||||
#;(begin
 | 
			
		||||
  (seq
 | 
			
		||||
   (put (format "http://localhost:7700/indexes/~a/settings/searchable-attributes" wikiname)
 | 
			
		||||
        #:json '("title" "body"))
 | 
			
		||||
   (put (format "http://localhost:7700/indexes/~a/settings/ranking-rules" wikiname)
 | 
			
		||||
        #:json '("words" "typo" #;"proximity" "attribute" "sort" "exactness" #;"len:desc"))
 | 
			
		||||
   (call-with-input-file "stop-words.json"
 | 
			
		||||
     (λ (in)
 | 
			
		||||
       (put (format "http://localhost:7700/indexes/~a/settings/stop-words" wikiname)
 | 
			
		||||
            #:headers '#hasheq((Content-Type . "application/json"))
 | 
			
		||||
            #:data in))))
 | 
			
		||||
  (define data
 | 
			
		||||
    (for/list ([fut results]
 | 
			
		||||
               [i (in-naturals 1)])
 | 
			
		||||
      (display "-")
 | 
			
		||||
      (when (and (> i 0) (= (modulo i 100) 0))
 | 
			
		||||
        (println i))
 | 
			
		||||
      (define-values (title body pageid) (fut))
 | 
			
		||||
      (define len (string-length body))
 | 
			
		||||
      `#hasheq((id . ,pageid)
 | 
			
		||||
               (title . ,title)
 | 
			
		||||
               (body . ,body)
 | 
			
		||||
               (len . ,len))))
 | 
			
		||||
  (define res
 | 
			
		||||
    (post (format "http://localhost:7700/indexes/~a/documents" wikiname)
 | 
			
		||||
          #:json data))
 | 
			
		||||
  (seq res)
 | 
			
		||||
  (println (response-json res)))
 | 
			
		||||
 | 
			
		||||
(disconnect slc)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										8
									
								
								archiver/solr-config-dir/lang/contractions_ca.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								archiver/solr-config-dir/lang/contractions_ca.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,8 @@
 | 
			
		|||
# Set of Catalan contractions for ElisionFilter
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
d
 | 
			
		||||
l
 | 
			
		||||
m
 | 
			
		||||
n
 | 
			
		||||
s
 | 
			
		||||
t
 | 
			
		||||
							
								
								
									
										15
									
								
								archiver/solr-config-dir/lang/contractions_fr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								archiver/solr-config-dir/lang/contractions_fr.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,15 @@
 | 
			
		|||
# Set of French contractions for ElisionFilter
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
l
 | 
			
		||||
m
 | 
			
		||||
t
 | 
			
		||||
qu
 | 
			
		||||
n
 | 
			
		||||
s
 | 
			
		||||
j
 | 
			
		||||
d
 | 
			
		||||
c
 | 
			
		||||
jusqu
 | 
			
		||||
quoiqu
 | 
			
		||||
lorsqu
 | 
			
		||||
puisqu
 | 
			
		||||
							
								
								
									
										5
									
								
								archiver/solr-config-dir/lang/contractions_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								archiver/solr-config-dir/lang/contractions_ga.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
# Set of Irish contractions for ElisionFilter
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
d
 | 
			
		||||
m
 | 
			
		||||
b
 | 
			
		||||
							
								
								
									
										23
									
								
								archiver/solr-config-dir/lang/contractions_it.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								archiver/solr-config-dir/lang/contractions_it.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,23 @@
 | 
			
		|||
# Set of Italian contractions for ElisionFilter
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
c
 | 
			
		||||
l 
 | 
			
		||||
all 
 | 
			
		||||
dall 
 | 
			
		||||
dell 
 | 
			
		||||
nell 
 | 
			
		||||
sull 
 | 
			
		||||
coll 
 | 
			
		||||
pell 
 | 
			
		||||
gl 
 | 
			
		||||
agl 
 | 
			
		||||
dagl 
 | 
			
		||||
degl 
 | 
			
		||||
negl 
 | 
			
		||||
sugl 
 | 
			
		||||
un 
 | 
			
		||||
m 
 | 
			
		||||
t 
 | 
			
		||||
s 
 | 
			
		||||
v 
 | 
			
		||||
d
 | 
			
		||||
							
								
								
									
										5
									
								
								archiver/solr-config-dir/lang/hyphenations_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								archiver/solr-config-dir/lang/hyphenations_ga.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,5 @@
 | 
			
		|||
# Set of Irish hyphenations for StopFilter
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
h
 | 
			
		||||
n
 | 
			
		||||
t
 | 
			
		||||
							
								
								
									
										6
									
								
								archiver/solr-config-dir/lang/stemdict_nl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								archiver/solr-config-dir/lang/stemdict_nl.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,6 @@
 | 
			
		|||
# Set of overrides for the dutch stemmer
 | 
			
		||||
# TODO: load this as a resource from the analyzer and sync it in build.xml
 | 
			
		||||
fiets	fiets
 | 
			
		||||
bromfiets	bromfiets
 | 
			
		||||
ei	eier
 | 
			
		||||
kind	kinder
 | 
			
		||||
							
								
								
									
										420
									
								
								archiver/solr-config-dir/lang/stoptags_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										420
									
								
								archiver/solr-config-dir/lang/stoptags_ja.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,420 @@
 | 
			
		|||
#
 | 
			
		||||
# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
 | 
			
		||||
#
 | 
			
		||||
# Any token with a part-of-speech tag that exactly matches those defined in this
 | 
			
		||||
# file are removed from the token stream.
 | 
			
		||||
#
 | 
			
		||||
# Set your own stoptags by uncommenting the lines below.  Note that comments are
 | 
			
		||||
# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists,
 | 
			
		||||
# etc. that can be useful for building you own stoptag set.
 | 
			
		||||
#
 | 
			
		||||
# The entire possible tagset is provided below for convenience.
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  noun: unclassified nouns
 | 
			
		||||
#名詞
 | 
			
		||||
#
 | 
			
		||||
#  noun-common: Common nouns or nouns where the sub-classification is undefined
 | 
			
		||||
#名詞-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper: Proper nouns where the sub-classification is undefined 
 | 
			
		||||
#名詞-固有名詞
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-misc: miscellaneous proper nouns
 | 
			
		||||
#名詞-固有名詞-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-person: Personal names where the sub-classification is undefined
 | 
			
		||||
#名詞-固有名詞-人名
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-person-misc: names that cannot be divided into surname and 
 | 
			
		||||
#  given name; foreign names; names where the surname or given name is unknown.
 | 
			
		||||
#  e.g. お市の方
 | 
			
		||||
#名詞-固有名詞-人名-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-person-surname: Mainly Japanese surnames.
 | 
			
		||||
#  e.g. 山田
 | 
			
		||||
#名詞-固有名詞-人名-姓
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-person-given_name: Mainly Japanese given names.
 | 
			
		||||
#  e.g. 太郎
 | 
			
		||||
#名詞-固有名詞-人名-名
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-organization: Names representing organizations.
 | 
			
		||||
#  e.g. 通産省, NHK
 | 
			
		||||
#名詞-固有名詞-組織
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-place: Place names where the sub-classification is undefined
 | 
			
		||||
#名詞-固有名詞-地域
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-place-misc: Place names excluding countries.
 | 
			
		||||
#  e.g. アジア, バルセロナ, 京都
 | 
			
		||||
#名詞-固有名詞-地域-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-proper-place-country: Country names. 
 | 
			
		||||
#  e.g. 日本, オーストラリア
 | 
			
		||||
#名詞-固有名詞-地域-国
 | 
			
		||||
#
 | 
			
		||||
#  noun-pronoun: Pronouns where the sub-classification is undefined
 | 
			
		||||
#名詞-代名詞
 | 
			
		||||
#
 | 
			
		||||
#  noun-pronoun-misc: miscellaneous pronouns: 
 | 
			
		||||
#  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
 | 
			
		||||
#名詞-代名詞-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-pronoun-contraction: Spoken language contraction made by combining a 
 | 
			
		||||
#  pronoun and the particle 'wa'.
 | 
			
		||||
#  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ 
 | 
			
		||||
#名詞-代名詞-縮約
 | 
			
		||||
#
 | 
			
		||||
#  noun-adverbial: Temporal nouns such as names of days or months that behave 
 | 
			
		||||
#  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
 | 
			
		||||
#  e.g. 金曜, 一月, 午後, 少量
 | 
			
		||||
#名詞-副詞可能
 | 
			
		||||
#
 | 
			
		||||
#  noun-verbal: Nouns that take arguments with case and can appear followed by 
 | 
			
		||||
#  'suru' and related verbs (する, できる, なさる, くださる)
 | 
			
		||||
#  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
 | 
			
		||||
#名詞-サ変接続
 | 
			
		||||
#
 | 
			
		||||
#  noun-adjective-base: The base form of adjectives, words that appear before な ("na")
 | 
			
		||||
#  e.g. 健康, 安易, 駄目, だめ
 | 
			
		||||
#名詞-形容動詞語幹
 | 
			
		||||
#
 | 
			
		||||
#  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
 | 
			
		||||
#  e.g. 0, 1, 2, 何, 数, 幾
 | 
			
		||||
#名詞-数
 | 
			
		||||
#
 | 
			
		||||
#  noun-affix: noun affixes where the sub-classification is undefined
 | 
			
		||||
#名詞-非自立
 | 
			
		||||
#
 | 
			
		||||
#  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that 
 | 
			
		||||
#  attach to the base form of inflectional words, words that cannot be classified 
 | 
			
		||||
#  into any of the other categories below. This category includes indefinite nouns.
 | 
			
		||||
#  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, 
 | 
			
		||||
#       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, 
 | 
			
		||||
#       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
 | 
			
		||||
#       わり, 割り, 割, ん-口語/, もん-口語/
 | 
			
		||||
#名詞-非自立-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-affix-adverbial: noun affixes that that can behave as adverbs.
 | 
			
		||||
#  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, 
 | 
			
		||||
#       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, 
 | 
			
		||||
#       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, 
 | 
			
		||||
#       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, 
 | 
			
		||||
#       儘, 侭, みぎり, 矢先
 | 
			
		||||
#名詞-非自立-副詞可能
 | 
			
		||||
#
 | 
			
		||||
#  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars 
 | 
			
		||||
#  with the stem よう(だ) ("you(da)").
 | 
			
		||||
#  e.g.  よう, やう, 様 (よう)
 | 
			
		||||
#名詞-非自立-助動詞語幹
 | 
			
		||||
#  
 | 
			
		||||
#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
 | 
			
		||||
#  connection form な (aux "da").
 | 
			
		||||
#  e.g. みたい, ふう
 | 
			
		||||
#名詞-非自立-形容動詞語幹
 | 
			
		||||
#
 | 
			
		||||
#  noun-special: special nouns where the sub-classification is undefined.
 | 
			
		||||
#名詞-特殊
 | 
			
		||||
#
 | 
			
		||||
#  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is 
 | 
			
		||||
#  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base 
 | 
			
		||||
#  form of inflectional words.
 | 
			
		||||
#  e.g. そう
 | 
			
		||||
#名詞-特殊-助動詞語幹
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix: noun suffixes where the sub-classification is undefined.
 | 
			
		||||
#名詞-接尾
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
 | 
			
		||||
#  to ガル or タイ and can combine into compound nouns, words that cannot be classified into
 | 
			
		||||
#  any of the other categories below. In general, this category is more inclusive than 
 | 
			
		||||
#  接尾語 ("suffix") and is usually the last element in a compound noun.
 | 
			
		||||
#  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
 | 
			
		||||
#       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
 | 
			
		||||
#名詞-接尾-一般
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-person: Suffixes that form nouns and attach to person names more often
 | 
			
		||||
#  than other nouns.
 | 
			
		||||
#  e.g. 君, 様, 著
 | 
			
		||||
#名詞-接尾-人名
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
 | 
			
		||||
#  than other nouns.
 | 
			
		||||
#  e.g. 町, 市, 県
 | 
			
		||||
#名詞-接尾-地域
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
 | 
			
		||||
#  can appear before スル ("suru").
 | 
			
		||||
#  e.g. 化, 視, 分け, 入り, 落ち, 買い
 | 
			
		||||
#名詞-接尾-サ変接続
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, 
 | 
			
		||||
#  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the 
 | 
			
		||||
#  conjunctive form of inflectional words.
 | 
			
		||||
#  e.g. そう
 | 
			
		||||
#名詞-接尾-助動詞語幹
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
 | 
			
		||||
#  form of inflectional words and appear before the copula だ ("da").
 | 
			
		||||
#  e.g. 的, げ, がち
 | 
			
		||||
#名詞-接尾-形容動詞語幹
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
 | 
			
		||||
#  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
 | 
			
		||||
#名詞-接尾-副詞可能
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
 | 
			
		||||
#  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach 
 | 
			
		||||
#  to numbers.
 | 
			
		||||
#  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
 | 
			
		||||
#名詞-接尾-助数詞
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
 | 
			
		||||
#  e.g. (楽し) さ, (考え) 方
 | 
			
		||||
#名詞-接尾-特殊
 | 
			
		||||
#
 | 
			
		||||
#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
 | 
			
		||||
#  together.
 | 
			
		||||
#  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
 | 
			
		||||
#名詞-接続詞的
 | 
			
		||||
#
 | 
			
		||||
#  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are 
 | 
			
		||||
#  semantically verb-like.
 | 
			
		||||
#  e.g. ごらん, ご覧, 御覧, 頂戴
 | 
			
		||||
#名詞-動詞非自立的
 | 
			
		||||
#
 | 
			
		||||
#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
 | 
			
		||||
#  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") 
 | 
			
		||||
#  is いわく ("iwaku").
 | 
			
		||||
#名詞-引用文字列
 | 
			
		||||
#
 | 
			
		||||
#  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
 | 
			
		||||
#  behave like an adjective.
 | 
			
		||||
#  e.g. 申し訳, 仕方, とんでも, 違い
 | 
			
		||||
#名詞-ナイ形容詞語幹
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  prefix: unclassified prefixes
 | 
			
		||||
#接頭詞
 | 
			
		||||
#
 | 
			
		||||
#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
 | 
			
		||||
#  excluding numerical expressions.
 | 
			
		||||
#  e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
 | 
			
		||||
#接頭詞-名詞接続
 | 
			
		||||
#
 | 
			
		||||
#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
 | 
			
		||||
#  in conjunctive form followed by なる/なさる/くださる.
 | 
			
		||||
#  e.g. お (読みなさい), お (座り)
 | 
			
		||||
#接頭詞-動詞接続
 | 
			
		||||
#
 | 
			
		||||
#  prefix-adjectival: Prefixes that attach to adjectives.
 | 
			
		||||
#  e.g. お (寒いですねえ), バカ (でかい)
 | 
			
		||||
#接頭詞-形容詞接続
 | 
			
		||||
#
 | 
			
		||||
#  prefix-numerical: Prefixes that attach to numerical expressions.
 | 
			
		||||
#  e.g. 約, およそ, 毎時
 | 
			
		||||
#接頭詞-数接続
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  verb: unclassified verbs
 | 
			
		||||
#動詞
 | 
			
		||||
#
 | 
			
		||||
#  verb-main:
 | 
			
		||||
#動詞-自立
 | 
			
		||||
#
 | 
			
		||||
#  verb-auxiliary:
 | 
			
		||||
#動詞-非自立
 | 
			
		||||
#
 | 
			
		||||
#  verb-suffix:
 | 
			
		||||
#動詞-接尾
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  adjective: unclassified adjectives
 | 
			
		||||
#形容詞
 | 
			
		||||
#
 | 
			
		||||
#  adjective-main:
 | 
			
		||||
#形容詞-自立
 | 
			
		||||
#
 | 
			
		||||
#  adjective-auxiliary:
 | 
			
		||||
#形容詞-非自立
 | 
			
		||||
#
 | 
			
		||||
#  adjective-suffix:
 | 
			
		||||
#形容詞-接尾
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  adverb: unclassified adverbs
 | 
			
		||||
#副詞
 | 
			
		||||
#
 | 
			
		||||
#  adverb-misc: Words that can be segmented into one unit and where adnominal 
 | 
			
		||||
#  modification is not possible.
 | 
			
		||||
#  e.g. あいかわらず, 多分
 | 
			
		||||
#副詞-一般
 | 
			
		||||
#
 | 
			
		||||
#  adverb-particle_conjunction: Adverbs that can be followed by の, は, に, 
 | 
			
		||||
#  な, する, だ, etc.
 | 
			
		||||
#  e.g. こんなに, そんなに, あんなに, なにか, なんでも
 | 
			
		||||
#副詞-助詞類接続
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  adnominal: Words that only have noun-modifying forms.
 | 
			
		||||
#  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, 
 | 
			
		||||
#       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, 
 | 
			
		||||
#       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
 | 
			
		||||
#連体詞
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  conjunction: Conjunctions that can occur independently.
 | 
			
		||||
#  e.g. が, けれども, そして, じゃあ, それどころか
 | 
			
		||||
接続詞
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  particle: unclassified particles.
 | 
			
		||||
助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-case: case particles where the subclassification is undefined.
 | 
			
		||||
助詞-格助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-case-misc: Case particles.
 | 
			
		||||
#  e.g. から, が, で, と, に, へ, より, を, の, にて
 | 
			
		||||
助詞-格助詞-一般
 | 
			
		||||
#
 | 
			
		||||
#  particle-case-quote: the "to" that appears after nouns, a person’s speech, 
 | 
			
		||||
#  quotation marks, expressions of decisions from a meeting, reasons, judgements,
 | 
			
		||||
#  conjectures, etc.
 | 
			
		||||
#  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
 | 
			
		||||
助詞-格助詞-引用
 | 
			
		||||
#
 | 
			
		||||
#  particle-case-compound: Compounds of particles and verbs that mainly behave 
 | 
			
		||||
#  like case particles.
 | 
			
		||||
#  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
 | 
			
		||||
#       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, 
 | 
			
		||||
#       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, 
 | 
			
		||||
#       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, 
 | 
			
		||||
#       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
 | 
			
		||||
#       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, 
 | 
			
		||||
#       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
 | 
			
		||||
#       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
 | 
			
		||||
助詞-格助詞-連語
 | 
			
		||||
#
 | 
			
		||||
#  particle-conjunctive:
 | 
			
		||||
#  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, 
 | 
			
		||||
#       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, 
 | 
			
		||||
#       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
 | 
			
		||||
助詞-接続助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-dependency:
 | 
			
		||||
#  e.g. こそ, さえ, しか, すら, は, も, ぞ
 | 
			
		||||
助詞-係助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-adverbial:
 | 
			
		||||
#  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, 
 | 
			
		||||
#       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
 | 
			
		||||
#       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, 
 | 
			
		||||
#       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
 | 
			
		||||
#       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
 | 
			
		||||
助詞-副助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-interjective: particles with interjective grammatical roles.
 | 
			
		||||
#  e.g. (松島) や
 | 
			
		||||
助詞-間投助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-coordinate:
 | 
			
		||||
#  e.g. と, たり, だの, だり, とか, なり, や, やら
 | 
			
		||||
助詞-並立助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-final:
 | 
			
		||||
#  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, 
 | 
			
		||||
#       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
 | 
			
		||||
助詞-終助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
 | 
			
		||||
#  adverbial, conjunctive, or sentence final. For example:
 | 
			
		||||
#       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
 | 
			
		||||
#       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
 | 
			
		||||
#           「(祈りが届いたせい) か (, 試験に合格した.)」
 | 
			
		||||
#       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
 | 
			
		||||
#  e.g. か
 | 
			
		||||
助詞-副助詞/並立助詞/終助詞
 | 
			
		||||
#
 | 
			
		||||
#  particle-adnominalizer: The "no" that attaches to nouns and modifies 
 | 
			
		||||
#  non-inflectional words.
 | 
			
		||||
助詞-連体化
 | 
			
		||||
#
 | 
			
		||||
#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
 | 
			
		||||
#  that are giongo, giseigo, or gitaigo.
 | 
			
		||||
#  e.g. に, と
 | 
			
		||||
助詞-副詞化
 | 
			
		||||
#
 | 
			
		||||
#  particle-special: A particle that does not fit into one of the above classifications. 
 | 
			
		||||
#  This includes particles that are used in Tanka, Haiku, and other poetry.
 | 
			
		||||
#  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
 | 
			
		||||
助詞-特殊
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  auxiliary-verb:
 | 
			
		||||
助動詞
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  interjection: Greetings and other exclamations.
 | 
			
		||||
#  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, 
 | 
			
		||||
#       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
 | 
			
		||||
#感動詞
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  symbol: unclassified Symbols.
 | 
			
		||||
記号
 | 
			
		||||
#
 | 
			
		||||
#  symbol-misc: A general symbol not in one of the categories below.
 | 
			
		||||
#  e.g. [○◎@$〒→+]
 | 
			
		||||
記号-一般
 | 
			
		||||
#
 | 
			
		||||
#  symbol-comma: Commas
 | 
			
		||||
#  e.g. [,、]
 | 
			
		||||
記号-読点
 | 
			
		||||
#
 | 
			
		||||
#  symbol-period: Periods and full stops.
 | 
			
		||||
#  e.g. [..。]
 | 
			
		||||
記号-句点
 | 
			
		||||
#
 | 
			
		||||
#  symbol-space: Full-width whitespace.
 | 
			
		||||
記号-空白
 | 
			
		||||
#
 | 
			
		||||
#  symbol-open_bracket:
 | 
			
		||||
#  e.g. [({‘“『【]
 | 
			
		||||
記号-括弧開
 | 
			
		||||
#
 | 
			
		||||
#  symbol-close_bracket:
 | 
			
		||||
#  e.g. [)}’”』」】]
 | 
			
		||||
記号-括弧閉
 | 
			
		||||
#
 | 
			
		||||
#  symbol-alphabetic:
 | 
			
		||||
#記号-アルファベット
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  other: unclassified other
 | 
			
		||||
#その他
 | 
			
		||||
#
 | 
			
		||||
#  other-interjection: Words that are hard to classify as noun-suffixes or 
 | 
			
		||||
#  sentence-final particles.
 | 
			
		||||
#  e.g. (だ)ァ
 | 
			
		||||
その他-間投
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
 | 
			
		||||
#  e.g. あの, うんと, えと
 | 
			
		||||
フィラー
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  non-verbal: non-verbal sound.
 | 
			
		||||
非言語音
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  fragment:
 | 
			
		||||
#語断片
 | 
			
		||||
#
 | 
			
		||||
#####
 | 
			
		||||
#  unknown: unknown part of speech.
 | 
			
		||||
#未知語
 | 
			
		||||
#
 | 
			
		||||
##### End of file
 | 
			
		||||
							
								
								
									
										125
									
								
								archiver/solr-config-dir/lang/stopwords_ar.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								archiver/solr-config-dir/lang/stopwords_ar.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,125 @@
 | 
			
		|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
 | 
			
		||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
 | 
			
		||||
# Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
 | 
			
		||||
# This means that when modifying this list, you might need to add some 
 | 
			
		||||
# redundant entries, for example containing forms with both أ and ا
 | 
			
		||||
من
 | 
			
		||||
ومن
 | 
			
		||||
منها
 | 
			
		||||
منه
 | 
			
		||||
في
 | 
			
		||||
وفي
 | 
			
		||||
فيها
 | 
			
		||||
فيه
 | 
			
		||||
و
 | 
			
		||||
ف
 | 
			
		||||
ثم
 | 
			
		||||
او
 | 
			
		||||
أو
 | 
			
		||||
ب
 | 
			
		||||
بها
 | 
			
		||||
به
 | 
			
		||||
ا
 | 
			
		||||
أ
 | 
			
		||||
اى
 | 
			
		||||
اي
 | 
			
		||||
أي
 | 
			
		||||
أى
 | 
			
		||||
لا
 | 
			
		||||
ولا
 | 
			
		||||
الا
 | 
			
		||||
ألا
 | 
			
		||||
إلا
 | 
			
		||||
لكن
 | 
			
		||||
ما
 | 
			
		||||
وما
 | 
			
		||||
كما
 | 
			
		||||
فما
 | 
			
		||||
عن
 | 
			
		||||
مع
 | 
			
		||||
اذا
 | 
			
		||||
إذا
 | 
			
		||||
ان
 | 
			
		||||
أن
 | 
			
		||||
إن
 | 
			
		||||
انها
 | 
			
		||||
أنها
 | 
			
		||||
إنها
 | 
			
		||||
انه
 | 
			
		||||
أنه
 | 
			
		||||
إنه
 | 
			
		||||
بان
 | 
			
		||||
بأن
 | 
			
		||||
فان
 | 
			
		||||
فأن
 | 
			
		||||
وان
 | 
			
		||||
وأن
 | 
			
		||||
وإن
 | 
			
		||||
التى
 | 
			
		||||
التي
 | 
			
		||||
الذى
 | 
			
		||||
الذي
 | 
			
		||||
الذين
 | 
			
		||||
الى
 | 
			
		||||
الي
 | 
			
		||||
إلى
 | 
			
		||||
إلي
 | 
			
		||||
على
 | 
			
		||||
عليها
 | 
			
		||||
عليه
 | 
			
		||||
اما
 | 
			
		||||
أما
 | 
			
		||||
إما
 | 
			
		||||
ايضا
 | 
			
		||||
أيضا
 | 
			
		||||
كل
 | 
			
		||||
وكل
 | 
			
		||||
لم
 | 
			
		||||
ولم
 | 
			
		||||
لن
 | 
			
		||||
ولن
 | 
			
		||||
هى
 | 
			
		||||
هي
 | 
			
		||||
هو
 | 
			
		||||
وهى
 | 
			
		||||
وهي
 | 
			
		||||
وهو
 | 
			
		||||
فهى
 | 
			
		||||
فهي
 | 
			
		||||
فهو
 | 
			
		||||
انت
 | 
			
		||||
أنت
 | 
			
		||||
لك
 | 
			
		||||
لها
 | 
			
		||||
له
 | 
			
		||||
هذه
 | 
			
		||||
هذا
 | 
			
		||||
تلك
 | 
			
		||||
ذلك
 | 
			
		||||
هناك
 | 
			
		||||
كانت
 | 
			
		||||
كان
 | 
			
		||||
يكون
 | 
			
		||||
تكون
 | 
			
		||||
وكانت
 | 
			
		||||
وكان
 | 
			
		||||
غير
 | 
			
		||||
بعض
 | 
			
		||||
قد
 | 
			
		||||
نحو
 | 
			
		||||
بين
 | 
			
		||||
بينما
 | 
			
		||||
منذ
 | 
			
		||||
ضمن
 | 
			
		||||
حيث
 | 
			
		||||
الان
 | 
			
		||||
الآن
 | 
			
		||||
خلال
 | 
			
		||||
بعد
 | 
			
		||||
قبل
 | 
			
		||||
حتى
 | 
			
		||||
عند
 | 
			
		||||
عندما
 | 
			
		||||
لدى
 | 
			
		||||
جميع
 | 
			
		||||
							
								
								
									
										193
									
								
								archiver/solr-config-dir/lang/stopwords_bg.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								archiver/solr-config-dir/lang/stopwords_bg.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,193 @@
 | 
			
		|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
 | 
			
		||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
 | 
			
		||||
# Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
а
 | 
			
		||||
аз
 | 
			
		||||
ако
 | 
			
		||||
ала
 | 
			
		||||
бе
 | 
			
		||||
без
 | 
			
		||||
беше
 | 
			
		||||
би
 | 
			
		||||
бил
 | 
			
		||||
била
 | 
			
		||||
били
 | 
			
		||||
било
 | 
			
		||||
близо
 | 
			
		||||
бъдат
 | 
			
		||||
бъде
 | 
			
		||||
бяха
 | 
			
		||||
в
 | 
			
		||||
вас
 | 
			
		||||
ваш
 | 
			
		||||
ваша
 | 
			
		||||
вероятно
 | 
			
		||||
вече
 | 
			
		||||
взема
 | 
			
		||||
ви
 | 
			
		||||
вие
 | 
			
		||||
винаги
 | 
			
		||||
все
 | 
			
		||||
всеки
 | 
			
		||||
всички
 | 
			
		||||
всичко
 | 
			
		||||
всяка
 | 
			
		||||
във
 | 
			
		||||
въпреки
 | 
			
		||||
върху
 | 
			
		||||
г
 | 
			
		||||
ги
 | 
			
		||||
главно
 | 
			
		||||
го
 | 
			
		||||
д
 | 
			
		||||
да
 | 
			
		||||
дали
 | 
			
		||||
до
 | 
			
		||||
докато
 | 
			
		||||
докога
 | 
			
		||||
дори
 | 
			
		||||
досега
 | 
			
		||||
доста
 | 
			
		||||
е
 | 
			
		||||
едва
 | 
			
		||||
един
 | 
			
		||||
ето
 | 
			
		||||
за
 | 
			
		||||
зад
 | 
			
		||||
заедно
 | 
			
		||||
заради
 | 
			
		||||
засега
 | 
			
		||||
затова
 | 
			
		||||
защо
 | 
			
		||||
защото
 | 
			
		||||
и
 | 
			
		||||
из
 | 
			
		||||
или
 | 
			
		||||
им
 | 
			
		||||
има
 | 
			
		||||
имат
 | 
			
		||||
иска
 | 
			
		||||
й
 | 
			
		||||
каза
 | 
			
		||||
как
 | 
			
		||||
каква
 | 
			
		||||
какво
 | 
			
		||||
както
 | 
			
		||||
какъв
 | 
			
		||||
като
 | 
			
		||||
кога
 | 
			
		||||
когато
 | 
			
		||||
което
 | 
			
		||||
които
 | 
			
		||||
кой
 | 
			
		||||
който
 | 
			
		||||
колко
 | 
			
		||||
която
 | 
			
		||||
къде
 | 
			
		||||
където
 | 
			
		||||
към
 | 
			
		||||
ли
 | 
			
		||||
м
 | 
			
		||||
ме
 | 
			
		||||
между
 | 
			
		||||
мен
 | 
			
		||||
ми
 | 
			
		||||
мнозина
 | 
			
		||||
мога
 | 
			
		||||
могат
 | 
			
		||||
може
 | 
			
		||||
моля
 | 
			
		||||
момента
 | 
			
		||||
му
 | 
			
		||||
н
 | 
			
		||||
на
 | 
			
		||||
над
 | 
			
		||||
назад
 | 
			
		||||
най
 | 
			
		||||
направи
 | 
			
		||||
напред
 | 
			
		||||
например
 | 
			
		||||
нас
 | 
			
		||||
не
 | 
			
		||||
него
 | 
			
		||||
нея
 | 
			
		||||
ни
 | 
			
		||||
ние
 | 
			
		||||
никой
 | 
			
		||||
нито
 | 
			
		||||
но
 | 
			
		||||
някои
 | 
			
		||||
някой
 | 
			
		||||
няма
 | 
			
		||||
обаче
 | 
			
		||||
около
 | 
			
		||||
освен
 | 
			
		||||
особено
 | 
			
		||||
от
 | 
			
		||||
отгоре
 | 
			
		||||
отново
 | 
			
		||||
още
 | 
			
		||||
пак
 | 
			
		||||
по
 | 
			
		||||
повече
 | 
			
		||||
повечето
 | 
			
		||||
под
 | 
			
		||||
поне
 | 
			
		||||
поради
 | 
			
		||||
после
 | 
			
		||||
почти
 | 
			
		||||
прави
 | 
			
		||||
пред
 | 
			
		||||
преди
 | 
			
		||||
през
 | 
			
		||||
при
 | 
			
		||||
пък
 | 
			
		||||
първо
 | 
			
		||||
с
 | 
			
		||||
са
 | 
			
		||||
само
 | 
			
		||||
се
 | 
			
		||||
сега
 | 
			
		||||
си
 | 
			
		||||
скоро
 | 
			
		||||
след
 | 
			
		||||
сме
 | 
			
		||||
според
 | 
			
		||||
сред
 | 
			
		||||
срещу
 | 
			
		||||
сте
 | 
			
		||||
съм
 | 
			
		||||
със
 | 
			
		||||
също
 | 
			
		||||
т
 | 
			
		||||
тази
 | 
			
		||||
така
 | 
			
		||||
такива
 | 
			
		||||
такъв
 | 
			
		||||
там
 | 
			
		||||
твой
 | 
			
		||||
те
 | 
			
		||||
тези
 | 
			
		||||
ти
 | 
			
		||||
тн
 | 
			
		||||
то
 | 
			
		||||
това
 | 
			
		||||
тогава
 | 
			
		||||
този
 | 
			
		||||
той
 | 
			
		||||
толкова
 | 
			
		||||
точно
 | 
			
		||||
трябва
 | 
			
		||||
тук
 | 
			
		||||
тъй
 | 
			
		||||
тя
 | 
			
		||||
тях
 | 
			
		||||
у
 | 
			
		||||
харесва
 | 
			
		||||
ч
 | 
			
		||||
че
 | 
			
		||||
често
 | 
			
		||||
чрез
 | 
			
		||||
ще
 | 
			
		||||
щом
 | 
			
		||||
я
 | 
			
		||||
							
								
								
									
										220
									
								
								archiver/solr-config-dir/lang/stopwords_ca.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								archiver/solr-config-dir/lang/stopwords_ca.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,220 @@
 | 
			
		|||
# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
 | 
			
		||||
a
 | 
			
		||||
abans
 | 
			
		||||
ací
 | 
			
		||||
ah
 | 
			
		||||
així
 | 
			
		||||
això
 | 
			
		||||
al
 | 
			
		||||
als
 | 
			
		||||
aleshores
 | 
			
		||||
algun
 | 
			
		||||
alguna
 | 
			
		||||
algunes
 | 
			
		||||
alguns
 | 
			
		||||
alhora
 | 
			
		||||
allà
 | 
			
		||||
allí
 | 
			
		||||
allò
 | 
			
		||||
altra
 | 
			
		||||
altre
 | 
			
		||||
altres
 | 
			
		||||
amb
 | 
			
		||||
ambdós
 | 
			
		||||
ambdues
 | 
			
		||||
apa
 | 
			
		||||
aquell
 | 
			
		||||
aquella
 | 
			
		||||
aquelles
 | 
			
		||||
aquells
 | 
			
		||||
aquest
 | 
			
		||||
aquesta
 | 
			
		||||
aquestes
 | 
			
		||||
aquests
 | 
			
		||||
aquí
 | 
			
		||||
baix
 | 
			
		||||
cada
 | 
			
		||||
cadascú
 | 
			
		||||
cadascuna
 | 
			
		||||
cadascunes
 | 
			
		||||
cadascuns
 | 
			
		||||
com
 | 
			
		||||
contra
 | 
			
		||||
d'un
 | 
			
		||||
d'una
 | 
			
		||||
d'unes
 | 
			
		||||
d'uns
 | 
			
		||||
dalt
 | 
			
		||||
de
 | 
			
		||||
del
 | 
			
		||||
dels
 | 
			
		||||
des
 | 
			
		||||
després
 | 
			
		||||
dins
 | 
			
		||||
dintre
 | 
			
		||||
donat
 | 
			
		||||
doncs
 | 
			
		||||
durant
 | 
			
		||||
e
 | 
			
		||||
eh
 | 
			
		||||
el
 | 
			
		||||
els
 | 
			
		||||
em
 | 
			
		||||
en
 | 
			
		||||
encara
 | 
			
		||||
ens
 | 
			
		||||
entre
 | 
			
		||||
érem
 | 
			
		||||
eren
 | 
			
		||||
éreu
 | 
			
		||||
es
 | 
			
		||||
és
 | 
			
		||||
esta
 | 
			
		||||
està
 | 
			
		||||
estàvem
 | 
			
		||||
estaven
 | 
			
		||||
estàveu
 | 
			
		||||
esteu
 | 
			
		||||
et
 | 
			
		||||
etc
 | 
			
		||||
ets
 | 
			
		||||
fins
 | 
			
		||||
fora
 | 
			
		||||
gairebé
 | 
			
		||||
ha
 | 
			
		||||
han
 | 
			
		||||
has
 | 
			
		||||
havia
 | 
			
		||||
he
 | 
			
		||||
hem
 | 
			
		||||
heu
 | 
			
		||||
hi 
 | 
			
		||||
ho
 | 
			
		||||
i
 | 
			
		||||
igual
 | 
			
		||||
iguals
 | 
			
		||||
ja
 | 
			
		||||
l'hi
 | 
			
		||||
la
 | 
			
		||||
les
 | 
			
		||||
li
 | 
			
		||||
li'n
 | 
			
		||||
llavors
 | 
			
		||||
m'he
 | 
			
		||||
ma
 | 
			
		||||
mal
 | 
			
		||||
malgrat
 | 
			
		||||
mateix
 | 
			
		||||
mateixa
 | 
			
		||||
mateixes
 | 
			
		||||
mateixos
 | 
			
		||||
me
 | 
			
		||||
mentre
 | 
			
		||||
més
 | 
			
		||||
meu
 | 
			
		||||
meus
 | 
			
		||||
meva
 | 
			
		||||
meves
 | 
			
		||||
molt
 | 
			
		||||
molta
 | 
			
		||||
moltes
 | 
			
		||||
molts
 | 
			
		||||
mon
 | 
			
		||||
mons
 | 
			
		||||
n'he
 | 
			
		||||
n'hi
 | 
			
		||||
ne
 | 
			
		||||
ni
 | 
			
		||||
no
 | 
			
		||||
nogensmenys
 | 
			
		||||
només
 | 
			
		||||
nosaltres
 | 
			
		||||
nostra
 | 
			
		||||
nostre
 | 
			
		||||
nostres
 | 
			
		||||
o
 | 
			
		||||
oh
 | 
			
		||||
oi
 | 
			
		||||
on
 | 
			
		||||
pas
 | 
			
		||||
pel
 | 
			
		||||
pels
 | 
			
		||||
per
 | 
			
		||||
però
 | 
			
		||||
perquè
 | 
			
		||||
poc 
 | 
			
		||||
poca
 | 
			
		||||
pocs
 | 
			
		||||
poques
 | 
			
		||||
potser
 | 
			
		||||
propi
 | 
			
		||||
qual
 | 
			
		||||
quals
 | 
			
		||||
quan
 | 
			
		||||
quant 
 | 
			
		||||
que
 | 
			
		||||
què
 | 
			
		||||
quelcom
 | 
			
		||||
qui
 | 
			
		||||
quin
 | 
			
		||||
quina
 | 
			
		||||
quines
 | 
			
		||||
quins
 | 
			
		||||
s'ha
 | 
			
		||||
s'han
 | 
			
		||||
sa
 | 
			
		||||
semblant
 | 
			
		||||
semblants
 | 
			
		||||
ses
 | 
			
		||||
seu 
 | 
			
		||||
seus
 | 
			
		||||
seva
 | 
			
		||||
seva
 | 
			
		||||
seves
 | 
			
		||||
si
 | 
			
		||||
sobre
 | 
			
		||||
sobretot
 | 
			
		||||
sóc
 | 
			
		||||
solament
 | 
			
		||||
sols
 | 
			
		||||
son 
 | 
			
		||||
són
 | 
			
		||||
sons 
 | 
			
		||||
sota
 | 
			
		||||
sou
 | 
			
		||||
t'ha
 | 
			
		||||
t'han
 | 
			
		||||
t'he
 | 
			
		||||
ta
 | 
			
		||||
tal
 | 
			
		||||
també
 | 
			
		||||
tampoc
 | 
			
		||||
tan
 | 
			
		||||
tant
 | 
			
		||||
tanta
 | 
			
		||||
tantes
 | 
			
		||||
teu
 | 
			
		||||
teus
 | 
			
		||||
teva
 | 
			
		||||
teves
 | 
			
		||||
ton
 | 
			
		||||
tons
 | 
			
		||||
tot
 | 
			
		||||
tota
 | 
			
		||||
totes
 | 
			
		||||
tots
 | 
			
		||||
un
 | 
			
		||||
una
 | 
			
		||||
unes
 | 
			
		||||
uns
 | 
			
		||||
us
 | 
			
		||||
va
 | 
			
		||||
vaig
 | 
			
		||||
vam
 | 
			
		||||
van
 | 
			
		||||
vas
 | 
			
		||||
veu
 | 
			
		||||
vosaltres
 | 
			
		||||
vostra
 | 
			
		||||
vostre
 | 
			
		||||
vostres
 | 
			
		||||
							
								
								
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_cz.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_cz.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,172 @@
 | 
			
		|||
a
 | 
			
		||||
s
 | 
			
		||||
k
 | 
			
		||||
o
 | 
			
		||||
i
 | 
			
		||||
u
 | 
			
		||||
v
 | 
			
		||||
z
 | 
			
		||||
dnes
 | 
			
		||||
cz
 | 
			
		||||
tímto
 | 
			
		||||
budeš
 | 
			
		||||
budem
 | 
			
		||||
byli
 | 
			
		||||
jseš
 | 
			
		||||
můj
 | 
			
		||||
svým
 | 
			
		||||
ta
 | 
			
		||||
tomto
 | 
			
		||||
tohle
 | 
			
		||||
tuto
 | 
			
		||||
tyto
 | 
			
		||||
jej
 | 
			
		||||
zda
 | 
			
		||||
proč
 | 
			
		||||
máte
 | 
			
		||||
tato
 | 
			
		||||
kam
 | 
			
		||||
tohoto
 | 
			
		||||
kdo
 | 
			
		||||
kteří
 | 
			
		||||
mi
 | 
			
		||||
nám
 | 
			
		||||
tom
 | 
			
		||||
tomuto
 | 
			
		||||
mít
 | 
			
		||||
nic
 | 
			
		||||
proto
 | 
			
		||||
kterou
 | 
			
		||||
byla
 | 
			
		||||
toho
 | 
			
		||||
protože
 | 
			
		||||
asi
 | 
			
		||||
ho
 | 
			
		||||
naši
 | 
			
		||||
napište
 | 
			
		||||
re
 | 
			
		||||
což
 | 
			
		||||
tím
 | 
			
		||||
takže
 | 
			
		||||
svých
 | 
			
		||||
její
 | 
			
		||||
svými
 | 
			
		||||
jste
 | 
			
		||||
aj
 | 
			
		||||
tu
 | 
			
		||||
tedy
 | 
			
		||||
teto
 | 
			
		||||
bylo
 | 
			
		||||
kde
 | 
			
		||||
ke
 | 
			
		||||
pravé
 | 
			
		||||
ji
 | 
			
		||||
nad
 | 
			
		||||
nejsou
 | 
			
		||||
či
 | 
			
		||||
pod
 | 
			
		||||
téma
 | 
			
		||||
mezi
 | 
			
		||||
přes
 | 
			
		||||
ty
 | 
			
		||||
pak
 | 
			
		||||
vám
 | 
			
		||||
ani
 | 
			
		||||
když
 | 
			
		||||
však
 | 
			
		||||
neg
 | 
			
		||||
jsem
 | 
			
		||||
tento
 | 
			
		||||
článku
 | 
			
		||||
články
 | 
			
		||||
aby
 | 
			
		||||
jsme
 | 
			
		||||
před
 | 
			
		||||
pta
 | 
			
		||||
jejich
 | 
			
		||||
byl
 | 
			
		||||
ještě
 | 
			
		||||
až
 | 
			
		||||
bez
 | 
			
		||||
také
 | 
			
		||||
pouze
 | 
			
		||||
první
 | 
			
		||||
vaše
 | 
			
		||||
která
 | 
			
		||||
nás
 | 
			
		||||
nový
 | 
			
		||||
tipy
 | 
			
		||||
pokud
 | 
			
		||||
může
 | 
			
		||||
strana
 | 
			
		||||
jeho
 | 
			
		||||
své
 | 
			
		||||
jiné
 | 
			
		||||
zprávy
 | 
			
		||||
nové
 | 
			
		||||
není
 | 
			
		||||
vás
 | 
			
		||||
jen
 | 
			
		||||
podle
 | 
			
		||||
zde
 | 
			
		||||
už
 | 
			
		||||
být
 | 
			
		||||
více
 | 
			
		||||
bude
 | 
			
		||||
již
 | 
			
		||||
než
 | 
			
		||||
který
 | 
			
		||||
by
 | 
			
		||||
které
 | 
			
		||||
co
 | 
			
		||||
nebo
 | 
			
		||||
ten
 | 
			
		||||
tak
 | 
			
		||||
má
 | 
			
		||||
při
 | 
			
		||||
od
 | 
			
		||||
po
 | 
			
		||||
jsou
 | 
			
		||||
jak
 | 
			
		||||
další
 | 
			
		||||
ale
 | 
			
		||||
si
 | 
			
		||||
se
 | 
			
		||||
ve
 | 
			
		||||
to
 | 
			
		||||
jako
 | 
			
		||||
za
 | 
			
		||||
zpět
 | 
			
		||||
ze
 | 
			
		||||
do
 | 
			
		||||
pro
 | 
			
		||||
je
 | 
			
		||||
na
 | 
			
		||||
atd
 | 
			
		||||
atp
 | 
			
		||||
jakmile
 | 
			
		||||
přičemž
 | 
			
		||||
já
 | 
			
		||||
on
 | 
			
		||||
ona
 | 
			
		||||
ono
 | 
			
		||||
oni
 | 
			
		||||
ony
 | 
			
		||||
my
 | 
			
		||||
vy
 | 
			
		||||
jí
 | 
			
		||||
ji
 | 
			
		||||
mě
 | 
			
		||||
mne
 | 
			
		||||
jemu
 | 
			
		||||
tomu
 | 
			
		||||
těm
 | 
			
		||||
těmu
 | 
			
		||||
němu
 | 
			
		||||
němuž
 | 
			
		||||
jehož
 | 
			
		||||
jíž
 | 
			
		||||
jelikož
 | 
			
		||||
jež
 | 
			
		||||
jakož
 | 
			
		||||
načež
 | 
			
		||||
							
								
								
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_da.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_da.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,110 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Danish stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | This is a ranked list (commonest to rarest) of stopwords derived from
 | 
			
		||||
 | a large text sample.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
og           | and
 | 
			
		||||
i            | in
 | 
			
		||||
jeg          | I
 | 
			
		||||
det          | that (dem. pronoun)/it (pers. pronoun)
 | 
			
		||||
at           | that (in front of a sentence)/to (with infinitive)
 | 
			
		||||
en           | a/an
 | 
			
		||||
den          | it (pers. pronoun)/that (dem. pronoun)
 | 
			
		||||
til          | to/at/for/until/against/by/of/into, more
 | 
			
		||||
er           | present tense of "to be"
 | 
			
		||||
som          | who, as
 | 
			
		||||
på           | on/upon/in/on/at/to/after/of/with/for, on
 | 
			
		||||
de           | they
 | 
			
		||||
med          | with/by/in, along
 | 
			
		||||
han          | he
 | 
			
		||||
af           | of/by/from/off/for/in/with/on, off
 | 
			
		||||
for          | at/for/to/from/by/of/ago, in front/before, because
 | 
			
		||||
ikke         | not
 | 
			
		||||
der          | who/which, there/those
 | 
			
		||||
var          | past tense of "to be"
 | 
			
		||||
mig          | me/myself
 | 
			
		||||
sig          | oneself/himself/herself/itself/themselves
 | 
			
		||||
men          | but
 | 
			
		||||
et           | a/an/one, one (number), someone/somebody/one
 | 
			
		||||
har          | present tense of "to have"
 | 
			
		||||
om           | round/about/for/in/a, about/around/down, if
 | 
			
		||||
vi           | we
 | 
			
		||||
min          | my
 | 
			
		||||
havde        | past tense of "to have"
 | 
			
		||||
ham          | him
 | 
			
		||||
hun          | she
 | 
			
		||||
nu           | now
 | 
			
		||||
over         | over/above/across/by/beyond/past/on/about, over/past
 | 
			
		||||
da           | then, when/as/since
 | 
			
		||||
fra          | from/off/since, off, since
 | 
			
		||||
du           | you
 | 
			
		||||
ud           | out
 | 
			
		||||
sin          | his/her/its/one's
 | 
			
		||||
dem          | them
 | 
			
		||||
os           | us/ourselves
 | 
			
		||||
op           | up
 | 
			
		||||
man          | you/one
 | 
			
		||||
hans         | his
 | 
			
		||||
hvor         | where
 | 
			
		||||
eller        | or
 | 
			
		||||
hvad         | what
 | 
			
		||||
skal         | must/shall etc.
 | 
			
		||||
selv         | myself/youself/herself/ourselves etc., even
 | 
			
		||||
her          | here
 | 
			
		||||
alle         | all/everyone/everybody etc.
 | 
			
		||||
vil          | will (verb)
 | 
			
		||||
blev         | past tense of "to stay/to remain/to get/to become"
 | 
			
		||||
kunne        | could
 | 
			
		||||
ind          | in
 | 
			
		||||
når          | when
 | 
			
		||||
være         | present tense of "to be"
 | 
			
		||||
dog          | however/yet/after all
 | 
			
		||||
noget        | something
 | 
			
		||||
ville        | would
 | 
			
		||||
jo           | you know/you see (adv), yes
 | 
			
		||||
deres        | their/theirs
 | 
			
		||||
efter        | after/behind/according to/for/by/from, later/afterwards
 | 
			
		||||
ned          | down
 | 
			
		||||
skulle       | should
 | 
			
		||||
denne        | this
 | 
			
		||||
end          | than
 | 
			
		||||
dette        | this
 | 
			
		||||
mit          | my/mine
 | 
			
		||||
også         | also
 | 
			
		||||
under        | under/beneath/below/during, below/underneath
 | 
			
		||||
have         | have
 | 
			
		||||
dig          | you
 | 
			
		||||
anden        | other
 | 
			
		||||
hende        | her
 | 
			
		||||
mine         | my
 | 
			
		||||
alt          | everything
 | 
			
		||||
meget        | much/very, plenty of
 | 
			
		||||
sit          | his, her, its, one's
 | 
			
		||||
sine         | his, her, its, one's
 | 
			
		||||
vor          | our
 | 
			
		||||
mod          | against
 | 
			
		||||
disse        | these
 | 
			
		||||
hvis         | if
 | 
			
		||||
din          | your/yours
 | 
			
		||||
nogle        | some
 | 
			
		||||
hos          | by/at
 | 
			
		||||
blive        | be/become
 | 
			
		||||
mange        | many
 | 
			
		||||
ad           | by/through
 | 
			
		||||
bliver       | present tense of "to be/to become"
 | 
			
		||||
hendes       | her/hers
 | 
			
		||||
været        | be
 | 
			
		||||
thi          | for (conj)
 | 
			
		||||
jer          | you
 | 
			
		||||
sådan        | such, like this/like that
 | 
			
		||||
							
								
								
									
										294
									
								
								archiver/solr-config-dir/lang/stopwords_de.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								archiver/solr-config-dir/lang/stopwords_de.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,294 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A German stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | The number of forms in this list is reduced significantly by passing it
 | 
			
		||||
 | through the German stemmer.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
aber           |  but
 | 
			
		||||
 | 
			
		||||
alle           |  all
 | 
			
		||||
allem
 | 
			
		||||
allen
 | 
			
		||||
aller
 | 
			
		||||
alles
 | 
			
		||||
 | 
			
		||||
als            |  than, as
 | 
			
		||||
also           |  so
 | 
			
		||||
am             |  an + dem
 | 
			
		||||
an             |  at
 | 
			
		||||
 | 
			
		||||
ander          |  other
 | 
			
		||||
andere
 | 
			
		||||
anderem
 | 
			
		||||
anderen
 | 
			
		||||
anderer
 | 
			
		||||
anderes
 | 
			
		||||
anderm
 | 
			
		||||
andern
 | 
			
		||||
anderr
 | 
			
		||||
anders
 | 
			
		||||
 | 
			
		||||
auch           |  also
 | 
			
		||||
auf            |  on
 | 
			
		||||
aus            |  out of
 | 
			
		||||
bei            |  by
 | 
			
		||||
bin            |  am
 | 
			
		||||
bis            |  until
 | 
			
		||||
bist           |  art
 | 
			
		||||
da             |  there
 | 
			
		||||
damit          |  with it
 | 
			
		||||
dann           |  then
 | 
			
		||||
 | 
			
		||||
der            |  the
 | 
			
		||||
den
 | 
			
		||||
des
 | 
			
		||||
dem
 | 
			
		||||
die
 | 
			
		||||
das
 | 
			
		||||
 | 
			
		||||
daß            |  that
 | 
			
		||||
 | 
			
		||||
derselbe       |  the same
 | 
			
		||||
derselben
 | 
			
		||||
denselben
 | 
			
		||||
desselben
 | 
			
		||||
demselben
 | 
			
		||||
dieselbe
 | 
			
		||||
dieselben
 | 
			
		||||
dasselbe
 | 
			
		||||
 | 
			
		||||
dazu           |  to that
 | 
			
		||||
 | 
			
		||||
dein           |  thy
 | 
			
		||||
deine
 | 
			
		||||
deinem
 | 
			
		||||
deinen
 | 
			
		||||
deiner
 | 
			
		||||
deines
 | 
			
		||||
 | 
			
		||||
denn           |  because
 | 
			
		||||
 | 
			
		||||
derer          |  of those
 | 
			
		||||
dessen         |  of him
 | 
			
		||||
 | 
			
		||||
dich           |  thee
 | 
			
		||||
dir            |  to thee
 | 
			
		||||
du             |  thou
 | 
			
		||||
 | 
			
		||||
dies           |  this
 | 
			
		||||
diese
 | 
			
		||||
diesem
 | 
			
		||||
diesen
 | 
			
		||||
dieser
 | 
			
		||||
dieses
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
doch           |  (several meanings)
 | 
			
		||||
dort           |  (over) there
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
durch          |  through
 | 
			
		||||
 | 
			
		||||
ein            |  a
 | 
			
		||||
eine
 | 
			
		||||
einem
 | 
			
		||||
einen
 | 
			
		||||
einer
 | 
			
		||||
eines
 | 
			
		||||
 | 
			
		||||
einig          |  some
 | 
			
		||||
einige
 | 
			
		||||
einigem
 | 
			
		||||
einigen
 | 
			
		||||
einiger
 | 
			
		||||
einiges
 | 
			
		||||
 | 
			
		||||
einmal         |  once
 | 
			
		||||
 | 
			
		||||
er             |  he
 | 
			
		||||
ihn            |  him
 | 
			
		||||
ihm            |  to him
 | 
			
		||||
 | 
			
		||||
es             |  it
 | 
			
		||||
etwas          |  something
 | 
			
		||||
 | 
			
		||||
euer           |  your
 | 
			
		||||
eure
 | 
			
		||||
eurem
 | 
			
		||||
euren
 | 
			
		||||
eurer
 | 
			
		||||
eures
 | 
			
		||||
 | 
			
		||||
für            |  for
 | 
			
		||||
gegen          |  towards
 | 
			
		||||
gewesen        |  p.p. of sein
 | 
			
		||||
hab            |  have
 | 
			
		||||
habe           |  have
 | 
			
		||||
haben          |  have
 | 
			
		||||
hat            |  has
 | 
			
		||||
hatte          |  had
 | 
			
		||||
hatten         |  had
 | 
			
		||||
hier           |  here
 | 
			
		||||
hin            |  there
 | 
			
		||||
hinter         |  behind
 | 
			
		||||
 | 
			
		||||
ich            |  I
 | 
			
		||||
mich           |  me
 | 
			
		||||
mir            |  to me
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ihr            |  you, to her
 | 
			
		||||
ihre
 | 
			
		||||
ihrem
 | 
			
		||||
ihren
 | 
			
		||||
ihrer
 | 
			
		||||
ihres
 | 
			
		||||
euch           |  to you
 | 
			
		||||
 | 
			
		||||
im             |  in + dem
 | 
			
		||||
in             |  in
 | 
			
		||||
indem          |  while
 | 
			
		||||
ins            |  in + das
 | 
			
		||||
ist            |  is
 | 
			
		||||
 | 
			
		||||
jede           |  each, every
 | 
			
		||||
jedem
 | 
			
		||||
jeden
 | 
			
		||||
jeder
 | 
			
		||||
jedes
 | 
			
		||||
 | 
			
		||||
jene           |  that
 | 
			
		||||
jenem
 | 
			
		||||
jenen
 | 
			
		||||
jener
 | 
			
		||||
jenes
 | 
			
		||||
 | 
			
		||||
jetzt          |  now
 | 
			
		||||
kann           |  can
 | 
			
		||||
 | 
			
		||||
kein           |  no
 | 
			
		||||
keine
 | 
			
		||||
keinem
 | 
			
		||||
keinen
 | 
			
		||||
keiner
 | 
			
		||||
keines
 | 
			
		||||
 | 
			
		||||
können         |  can
 | 
			
		||||
könnte         |  could
 | 
			
		||||
machen         |  do
 | 
			
		||||
man            |  one
 | 
			
		||||
 | 
			
		||||
manche         |  some, many a
 | 
			
		||||
manchem
 | 
			
		||||
manchen
 | 
			
		||||
mancher
 | 
			
		||||
manches
 | 
			
		||||
 | 
			
		||||
mein           |  my
 | 
			
		||||
meine
 | 
			
		||||
meinem
 | 
			
		||||
meinen
 | 
			
		||||
meiner
 | 
			
		||||
meines
 | 
			
		||||
 | 
			
		||||
mit            |  with
 | 
			
		||||
muss           |  must
 | 
			
		||||
musste         |  had to
 | 
			
		||||
nach           |  to(wards)
 | 
			
		||||
nicht          |  not
 | 
			
		||||
nichts         |  nothing
 | 
			
		||||
noch           |  still, yet
 | 
			
		||||
nun            |  now
 | 
			
		||||
nur            |  only
 | 
			
		||||
ob             |  whether
 | 
			
		||||
oder           |  or
 | 
			
		||||
ohne           |  without
 | 
			
		||||
sehr           |  very
 | 
			
		||||
 | 
			
		||||
sein           |  his
 | 
			
		||||
seine
 | 
			
		||||
seinem
 | 
			
		||||
seinen
 | 
			
		||||
seiner
 | 
			
		||||
seines
 | 
			
		||||
 | 
			
		||||
selbst         |  self
 | 
			
		||||
sich           |  herself
 | 
			
		||||
 | 
			
		||||
sie            |  they, she
 | 
			
		||||
ihnen          |  to them
 | 
			
		||||
 | 
			
		||||
sind           |  are
 | 
			
		||||
so             |  so
 | 
			
		||||
 | 
			
		||||
solche         |  such
 | 
			
		||||
solchem
 | 
			
		||||
solchen
 | 
			
		||||
solcher
 | 
			
		||||
solches
 | 
			
		||||
 | 
			
		||||
soll           |  shall
 | 
			
		||||
sollte         |  should
 | 
			
		||||
sondern        |  but
 | 
			
		||||
sonst          |  else
 | 
			
		||||
über           |  over
 | 
			
		||||
um             |  about, around
 | 
			
		||||
und            |  and
 | 
			
		||||
 | 
			
		||||
uns            |  us
 | 
			
		||||
unse
 | 
			
		||||
unsem
 | 
			
		||||
unsen
 | 
			
		||||
unser
 | 
			
		||||
unses
 | 
			
		||||
 | 
			
		||||
unter          |  under
 | 
			
		||||
viel           |  much
 | 
			
		||||
vom            |  von + dem
 | 
			
		||||
von            |  from
 | 
			
		||||
vor            |  before
 | 
			
		||||
während        |  while
 | 
			
		||||
war            |  was
 | 
			
		||||
waren          |  were
 | 
			
		||||
warst          |  wast
 | 
			
		||||
was            |  what
 | 
			
		||||
weg            |  away, off
 | 
			
		||||
weil           |  because
 | 
			
		||||
weiter         |  further
 | 
			
		||||
 | 
			
		||||
welche         |  which
 | 
			
		||||
welchem
 | 
			
		||||
welchen
 | 
			
		||||
welcher
 | 
			
		||||
welches
 | 
			
		||||
 | 
			
		||||
wenn           |  when
 | 
			
		||||
werde          |  will
 | 
			
		||||
werden         |  will
 | 
			
		||||
wie            |  how
 | 
			
		||||
wieder         |  again
 | 
			
		||||
will           |  want
 | 
			
		||||
wir            |  we
 | 
			
		||||
wird           |  will
 | 
			
		||||
wirst          |  willst
 | 
			
		||||
wo             |  where
 | 
			
		||||
wollen         |  want
 | 
			
		||||
wollte         |  wanted
 | 
			
		||||
würde          |  would
 | 
			
		||||
würden         |  would
 | 
			
		||||
zu             |  to
 | 
			
		||||
zum            |  zu + dem
 | 
			
		||||
zur            |  zu + der
 | 
			
		||||
zwar           |  indeed
 | 
			
		||||
zwischen       |  between
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										78
									
								
								archiver/solr-config-dir/lang/stopwords_el.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								archiver/solr-config-dir/lang/stopwords_el.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,78 @@
 | 
			
		|||
# Lucene Greek Stopwords list
 | 
			
		||||
# Note: by default this file is used after GreekLowerCaseFilter,
 | 
			
		||||
# so when modifying this file use 'σ' instead of 'ς' 
 | 
			
		||||
ο
 | 
			
		||||
η
 | 
			
		||||
το
 | 
			
		||||
οι
 | 
			
		||||
τα
 | 
			
		||||
του
 | 
			
		||||
τησ
 | 
			
		||||
των
 | 
			
		||||
τον
 | 
			
		||||
την
 | 
			
		||||
και 
 | 
			
		||||
κι
 | 
			
		||||
κ
 | 
			
		||||
ειμαι
 | 
			
		||||
εισαι
 | 
			
		||||
ειναι
 | 
			
		||||
ειμαστε
 | 
			
		||||
ειστε
 | 
			
		||||
στο
 | 
			
		||||
στον
 | 
			
		||||
στη
 | 
			
		||||
στην
 | 
			
		||||
μα
 | 
			
		||||
αλλα
 | 
			
		||||
απο
 | 
			
		||||
για
 | 
			
		||||
προσ
 | 
			
		||||
με
 | 
			
		||||
σε
 | 
			
		||||
ωσ
 | 
			
		||||
παρα
 | 
			
		||||
αντι
 | 
			
		||||
κατα
 | 
			
		||||
μετα
 | 
			
		||||
θα
 | 
			
		||||
να
 | 
			
		||||
δε
 | 
			
		||||
δεν
 | 
			
		||||
μη
 | 
			
		||||
μην
 | 
			
		||||
επι
 | 
			
		||||
ενω
 | 
			
		||||
εαν
 | 
			
		||||
αν
 | 
			
		||||
τοτε
 | 
			
		||||
που
 | 
			
		||||
πωσ
 | 
			
		||||
ποιοσ
 | 
			
		||||
ποια
 | 
			
		||||
ποιο
 | 
			
		||||
ποιοι
 | 
			
		||||
ποιεσ
 | 
			
		||||
ποιων
 | 
			
		||||
ποιουσ
 | 
			
		||||
αυτοσ
 | 
			
		||||
αυτη
 | 
			
		||||
αυτο
 | 
			
		||||
αυτοι
 | 
			
		||||
αυτων
 | 
			
		||||
αυτουσ
 | 
			
		||||
αυτεσ
 | 
			
		||||
αυτα
 | 
			
		||||
εκεινοσ
 | 
			
		||||
εκεινη
 | 
			
		||||
εκεινο
 | 
			
		||||
εκεινοι
 | 
			
		||||
εκεινεσ
 | 
			
		||||
εκεινα
 | 
			
		||||
εκεινων
 | 
			
		||||
εκεινουσ
 | 
			
		||||
οπωσ
 | 
			
		||||
ομωσ
 | 
			
		||||
ισωσ
 | 
			
		||||
οσο
 | 
			
		||||
οτι
 | 
			
		||||
							
								
								
									
										54
									
								
								archiver/solr-config-dir/lang/stopwords_en.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								archiver/solr-config-dir/lang/stopwords_en.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,54 @@
 | 
			
		|||
# Licensed to the Apache Software Foundation (ASF) under one or more
 | 
			
		||||
# contributor license agreements.  See the NOTICE file distributed with
 | 
			
		||||
# this work for additional information regarding copyright ownership.
 | 
			
		||||
# The ASF licenses this file to You under the Apache License, Version 2.0
 | 
			
		||||
# (the "License"); you may not use this file except in compliance with
 | 
			
		||||
# the License.  You may obtain a copy of the License at
 | 
			
		||||
#
 | 
			
		||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#
 | 
			
		||||
# Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
# See the License for the specific language governing permissions and
 | 
			
		||||
# limitations under the License.
 | 
			
		||||
 | 
			
		||||
# a couple of test stopwords to test that the words are really being
 | 
			
		||||
# configured from this file:
 | 
			
		||||
stopworda
 | 
			
		||||
stopwordb
 | 
			
		||||
 | 
			
		||||
# Standard english stop words taken from Lucene's StopAnalyzer
 | 
			
		||||
a
 | 
			
		||||
an
 | 
			
		||||
and
 | 
			
		||||
are
 | 
			
		||||
as
 | 
			
		||||
at
 | 
			
		||||
be
 | 
			
		||||
but
 | 
			
		||||
by
 | 
			
		||||
for
 | 
			
		||||
if
 | 
			
		||||
in
 | 
			
		||||
into
 | 
			
		||||
is
 | 
			
		||||
it
 | 
			
		||||
no
 | 
			
		||||
not
 | 
			
		||||
of
 | 
			
		||||
on
 | 
			
		||||
or
 | 
			
		||||
such
 | 
			
		||||
that
 | 
			
		||||
the
 | 
			
		||||
their
 | 
			
		||||
then
 | 
			
		||||
there
 | 
			
		||||
these
 | 
			
		||||
they
 | 
			
		||||
this
 | 
			
		||||
to
 | 
			
		||||
was
 | 
			
		||||
will
 | 
			
		||||
with
 | 
			
		||||
							
								
								
									
										356
									
								
								archiver/solr-config-dir/lang/stopwords_es.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										356
									
								
								archiver/solr-config-dir/lang/stopwords_es.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,356 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Spanish stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | The following is a ranked list (commonest to rarest) of stopwords
 | 
			
		||||
 | deriving from a large sample of text.
 | 
			
		||||
 | 
			
		||||
 | Extra words have been added at the end.
 | 
			
		||||
 | 
			
		||||
de             |  from, of
 | 
			
		||||
la             |  the, her
 | 
			
		||||
que            |  who, that
 | 
			
		||||
el             |  the
 | 
			
		||||
en             |  in
 | 
			
		||||
y              |  and
 | 
			
		||||
a              |  to
 | 
			
		||||
los            |  the, them
 | 
			
		||||
del            |  de + el
 | 
			
		||||
se             |  himself, from him etc
 | 
			
		||||
las            |  the, them
 | 
			
		||||
por            |  for, by, etc
 | 
			
		||||
un             |  a
 | 
			
		||||
para           |  for
 | 
			
		||||
con            |  with
 | 
			
		||||
no             |  no
 | 
			
		||||
una            |  a
 | 
			
		||||
su             |  his, her
 | 
			
		||||
al             |  a + el
 | 
			
		||||
  | es         from SER
 | 
			
		||||
lo             |  him
 | 
			
		||||
como           |  how
 | 
			
		||||
más            |  more
 | 
			
		||||
pero           |  pero
 | 
			
		||||
sus            |  su plural
 | 
			
		||||
le             |  to him, her
 | 
			
		||||
ya             |  already
 | 
			
		||||
o              |  or
 | 
			
		||||
  | fue        from SER
 | 
			
		||||
este           |  this
 | 
			
		||||
  | ha         from HABER
 | 
			
		||||
sí             |  himself etc
 | 
			
		||||
porque         |  because
 | 
			
		||||
esta           |  this
 | 
			
		||||
  | son        from SER
 | 
			
		||||
entre          |  between
 | 
			
		||||
  | está     from ESTAR
 | 
			
		||||
cuando         |  when
 | 
			
		||||
muy            |  very
 | 
			
		||||
sin            |  without
 | 
			
		||||
sobre          |  on
 | 
			
		||||
  | ser        from SER
 | 
			
		||||
  | tiene      from TENER
 | 
			
		||||
también        |  also
 | 
			
		||||
me             |  me
 | 
			
		||||
hasta          |  until
 | 
			
		||||
hay            |  there is/are
 | 
			
		||||
donde          |  where
 | 
			
		||||
  | han        from HABER
 | 
			
		||||
quien          |  whom, that
 | 
			
		||||
  | están      from ESTAR
 | 
			
		||||
  | estado     from ESTAR
 | 
			
		||||
desde          |  from
 | 
			
		||||
todo           |  all
 | 
			
		||||
nos            |  us
 | 
			
		||||
durante        |  during
 | 
			
		||||
  | estados    from ESTAR
 | 
			
		||||
todos          |  all
 | 
			
		||||
uno            |  a
 | 
			
		||||
les            |  to them
 | 
			
		||||
ni             |  nor
 | 
			
		||||
contra         |  against
 | 
			
		||||
otros          |  other
 | 
			
		||||
  | fueron     from SER
 | 
			
		||||
ese            |  that
 | 
			
		||||
eso            |  that
 | 
			
		||||
  | había      from HABER
 | 
			
		||||
ante           |  before
 | 
			
		||||
ellos          |  they
 | 
			
		||||
e              |  and (variant of y)
 | 
			
		||||
esto           |  this
 | 
			
		||||
mí             |  me
 | 
			
		||||
antes          |  before
 | 
			
		||||
algunos        |  some
 | 
			
		||||
qué            |  what?
 | 
			
		||||
unos           |  a
 | 
			
		||||
yo             |  I
 | 
			
		||||
otro           |  other
 | 
			
		||||
otras          |  other
 | 
			
		||||
otra           |  other
 | 
			
		||||
él             |  he
 | 
			
		||||
tanto          |  so much, many
 | 
			
		||||
esa            |  that
 | 
			
		||||
estos          |  these
 | 
			
		||||
mucho          |  much, many
 | 
			
		||||
quienes        |  who
 | 
			
		||||
nada           |  nothing
 | 
			
		||||
muchos         |  many
 | 
			
		||||
cual           |  who
 | 
			
		||||
  | sea        from SER
 | 
			
		||||
poco           |  few
 | 
			
		||||
ella           |  she
 | 
			
		||||
estar          |  to be
 | 
			
		||||
  | haber      from HABER
 | 
			
		||||
estas          |  these
 | 
			
		||||
  | estaba     from ESTAR
 | 
			
		||||
  | estamos    from ESTAR
 | 
			
		||||
algunas        |  some
 | 
			
		||||
algo           |  something
 | 
			
		||||
nosotros       |  we
 | 
			
		||||
 | 
			
		||||
      | other forms
 | 
			
		||||
 | 
			
		||||
mi             |  me
 | 
			
		||||
mis            |  mi plural
 | 
			
		||||
tú             |  thou
 | 
			
		||||
te             |  thee
 | 
			
		||||
ti             |  thee
 | 
			
		||||
tu             |  thy
 | 
			
		||||
tus            |  tu plural
 | 
			
		||||
ellas          |  they
 | 
			
		||||
nosotras       |  we
 | 
			
		||||
vosotros       |  you
 | 
			
		||||
vosotras       |  you
 | 
			
		||||
os             |  you
 | 
			
		||||
mío            |  mine
 | 
			
		||||
mía            |
 | 
			
		||||
míos           |
 | 
			
		||||
mías           |
 | 
			
		||||
tuyo           |  thine
 | 
			
		||||
tuya           |
 | 
			
		||||
tuyos          |
 | 
			
		||||
tuyas          |
 | 
			
		||||
suyo           |  his, hers, theirs
 | 
			
		||||
suya           |
 | 
			
		||||
suyos          |
 | 
			
		||||
suyas          |
 | 
			
		||||
nuestro        |  ours
 | 
			
		||||
nuestra        |
 | 
			
		||||
nuestros       |
 | 
			
		||||
nuestras       |
 | 
			
		||||
vuestro        |  yours
 | 
			
		||||
vuestra        |
 | 
			
		||||
vuestros       |
 | 
			
		||||
vuestras       |
 | 
			
		||||
esos           |  those
 | 
			
		||||
esas           |  those
 | 
			
		||||
 | 
			
		||||
               | forms of estar, to be (not including the infinitive):
 | 
			
		||||
estoy
 | 
			
		||||
estás
 | 
			
		||||
está
 | 
			
		||||
estamos
 | 
			
		||||
estáis
 | 
			
		||||
están
 | 
			
		||||
esté
 | 
			
		||||
estés
 | 
			
		||||
estemos
 | 
			
		||||
estéis
 | 
			
		||||
estén
 | 
			
		||||
estaré
 | 
			
		||||
estarás
 | 
			
		||||
estará
 | 
			
		||||
estaremos
 | 
			
		||||
estaréis
 | 
			
		||||
estarán
 | 
			
		||||
estaría
 | 
			
		||||
estarías
 | 
			
		||||
estaríamos
 | 
			
		||||
estaríais
 | 
			
		||||
estarían
 | 
			
		||||
estaba
 | 
			
		||||
estabas
 | 
			
		||||
estábamos
 | 
			
		||||
estabais
 | 
			
		||||
estaban
 | 
			
		||||
estuve
 | 
			
		||||
estuviste
 | 
			
		||||
estuvo
 | 
			
		||||
estuvimos
 | 
			
		||||
estuvisteis
 | 
			
		||||
estuvieron
 | 
			
		||||
estuviera
 | 
			
		||||
estuvieras
 | 
			
		||||
estuviéramos
 | 
			
		||||
estuvierais
 | 
			
		||||
estuvieran
 | 
			
		||||
estuviese
 | 
			
		||||
estuvieses
 | 
			
		||||
estuviésemos
 | 
			
		||||
estuvieseis
 | 
			
		||||
estuviesen
 | 
			
		||||
estando
 | 
			
		||||
estado
 | 
			
		||||
estada
 | 
			
		||||
estados
 | 
			
		||||
estadas
 | 
			
		||||
estad
 | 
			
		||||
 | 
			
		||||
               | forms of haber, to have (not including the infinitive):
 | 
			
		||||
he
 | 
			
		||||
has
 | 
			
		||||
ha
 | 
			
		||||
hemos
 | 
			
		||||
habéis
 | 
			
		||||
han
 | 
			
		||||
haya
 | 
			
		||||
hayas
 | 
			
		||||
hayamos
 | 
			
		||||
hayáis
 | 
			
		||||
hayan
 | 
			
		||||
habré
 | 
			
		||||
habrás
 | 
			
		||||
habrá
 | 
			
		||||
habremos
 | 
			
		||||
habréis
 | 
			
		||||
habrán
 | 
			
		||||
habría
 | 
			
		||||
habrías
 | 
			
		||||
habríamos
 | 
			
		||||
habríais
 | 
			
		||||
habrían
 | 
			
		||||
había
 | 
			
		||||
habías
 | 
			
		||||
habíamos
 | 
			
		||||
habíais
 | 
			
		||||
habían
 | 
			
		||||
hube
 | 
			
		||||
hubiste
 | 
			
		||||
hubo
 | 
			
		||||
hubimos
 | 
			
		||||
hubisteis
 | 
			
		||||
hubieron
 | 
			
		||||
hubiera
 | 
			
		||||
hubieras
 | 
			
		||||
hubiéramos
 | 
			
		||||
hubierais
 | 
			
		||||
hubieran
 | 
			
		||||
hubiese
 | 
			
		||||
hubieses
 | 
			
		||||
hubiésemos
 | 
			
		||||
hubieseis
 | 
			
		||||
hubiesen
 | 
			
		||||
habiendo
 | 
			
		||||
habido
 | 
			
		||||
habida
 | 
			
		||||
habidos
 | 
			
		||||
habidas
 | 
			
		||||
 | 
			
		||||
               | forms of ser, to be (not including the infinitive):
 | 
			
		||||
soy
 | 
			
		||||
eres
 | 
			
		||||
es
 | 
			
		||||
somos
 | 
			
		||||
sois
 | 
			
		||||
son
 | 
			
		||||
sea
 | 
			
		||||
seas
 | 
			
		||||
seamos
 | 
			
		||||
seáis
 | 
			
		||||
sean
 | 
			
		||||
seré
 | 
			
		||||
serás
 | 
			
		||||
será
 | 
			
		||||
seremos
 | 
			
		||||
seréis
 | 
			
		||||
serán
 | 
			
		||||
sería
 | 
			
		||||
serías
 | 
			
		||||
seríamos
 | 
			
		||||
seríais
 | 
			
		||||
serían
 | 
			
		||||
era
 | 
			
		||||
eras
 | 
			
		||||
éramos
 | 
			
		||||
erais
 | 
			
		||||
eran
 | 
			
		||||
fui
 | 
			
		||||
fuiste
 | 
			
		||||
fue
 | 
			
		||||
fuimos
 | 
			
		||||
fuisteis
 | 
			
		||||
fueron
 | 
			
		||||
fuera
 | 
			
		||||
fueras
 | 
			
		||||
fuéramos
 | 
			
		||||
fuerais
 | 
			
		||||
fueran
 | 
			
		||||
fuese
 | 
			
		||||
fueses
 | 
			
		||||
fuésemos
 | 
			
		||||
fueseis
 | 
			
		||||
fuesen
 | 
			
		||||
siendo
 | 
			
		||||
sido
 | 
			
		||||
  |  sed also means 'thirst'
 | 
			
		||||
 | 
			
		||||
               | forms of tener, to have (not including the infinitive):
 | 
			
		||||
tengo
 | 
			
		||||
tienes
 | 
			
		||||
tiene
 | 
			
		||||
tenemos
 | 
			
		||||
tenéis
 | 
			
		||||
tienen
 | 
			
		||||
tenga
 | 
			
		||||
tengas
 | 
			
		||||
tengamos
 | 
			
		||||
tengáis
 | 
			
		||||
tengan
 | 
			
		||||
tendré
 | 
			
		||||
tendrás
 | 
			
		||||
tendrá
 | 
			
		||||
tendremos
 | 
			
		||||
tendréis
 | 
			
		||||
tendrán
 | 
			
		||||
tendría
 | 
			
		||||
tendrías
 | 
			
		||||
tendríamos
 | 
			
		||||
tendríais
 | 
			
		||||
tendrían
 | 
			
		||||
tenía
 | 
			
		||||
tenías
 | 
			
		||||
teníamos
 | 
			
		||||
teníais
 | 
			
		||||
tenían
 | 
			
		||||
tuve
 | 
			
		||||
tuviste
 | 
			
		||||
tuvo
 | 
			
		||||
tuvimos
 | 
			
		||||
tuvisteis
 | 
			
		||||
tuvieron
 | 
			
		||||
tuviera
 | 
			
		||||
tuvieras
 | 
			
		||||
tuviéramos
 | 
			
		||||
tuvierais
 | 
			
		||||
tuvieran
 | 
			
		||||
tuviese
 | 
			
		||||
tuvieses
 | 
			
		||||
tuviésemos
 | 
			
		||||
tuvieseis
 | 
			
		||||
tuviesen
 | 
			
		||||
teniendo
 | 
			
		||||
tenido
 | 
			
		||||
tenida
 | 
			
		||||
tenidos
 | 
			
		||||
tenidas
 | 
			
		||||
tened
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1603
									
								
								archiver/solr-config-dir/lang/stopwords_et.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1603
									
								
								archiver/solr-config-dir/lang/stopwords_et.txt
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										99
									
								
								archiver/solr-config-dir/lang/stopwords_eu.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								archiver/solr-config-dir/lang/stopwords_eu.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,99 @@
 | 
			
		|||
# example set of basque stopwords
 | 
			
		||||
al
 | 
			
		||||
anitz
 | 
			
		||||
arabera
 | 
			
		||||
asko
 | 
			
		||||
baina
 | 
			
		||||
bat
 | 
			
		||||
batean
 | 
			
		||||
batek
 | 
			
		||||
bati
 | 
			
		||||
batzuei
 | 
			
		||||
batzuek
 | 
			
		||||
batzuetan
 | 
			
		||||
batzuk
 | 
			
		||||
bera
 | 
			
		||||
beraiek
 | 
			
		||||
berau
 | 
			
		||||
berauek
 | 
			
		||||
bere
 | 
			
		||||
berori
 | 
			
		||||
beroriek
 | 
			
		||||
beste
 | 
			
		||||
bezala
 | 
			
		||||
da
 | 
			
		||||
dago
 | 
			
		||||
dira
 | 
			
		||||
ditu
 | 
			
		||||
du
 | 
			
		||||
dute
 | 
			
		||||
edo
 | 
			
		||||
egin
 | 
			
		||||
ere
 | 
			
		||||
eta
 | 
			
		||||
eurak
 | 
			
		||||
ez
 | 
			
		||||
gainera
 | 
			
		||||
gu
 | 
			
		||||
gutxi
 | 
			
		||||
guzti
 | 
			
		||||
haiei
 | 
			
		||||
haiek
 | 
			
		||||
haietan
 | 
			
		||||
hainbeste
 | 
			
		||||
hala
 | 
			
		||||
han
 | 
			
		||||
handik
 | 
			
		||||
hango
 | 
			
		||||
hara
 | 
			
		||||
hari
 | 
			
		||||
hark
 | 
			
		||||
hartan
 | 
			
		||||
hau
 | 
			
		||||
hauei
 | 
			
		||||
hauek
 | 
			
		||||
hauetan
 | 
			
		||||
hemen
 | 
			
		||||
hemendik
 | 
			
		||||
hemengo
 | 
			
		||||
hi
 | 
			
		||||
hona
 | 
			
		||||
honek
 | 
			
		||||
honela
 | 
			
		||||
honetan
 | 
			
		||||
honi
 | 
			
		||||
hor
 | 
			
		||||
hori
 | 
			
		||||
horiei
 | 
			
		||||
horiek
 | 
			
		||||
horietan
 | 
			
		||||
horko
 | 
			
		||||
horra
 | 
			
		||||
horrek
 | 
			
		||||
horrela
 | 
			
		||||
horretan
 | 
			
		||||
horri
 | 
			
		||||
hortik
 | 
			
		||||
hura
 | 
			
		||||
izan
 | 
			
		||||
ni
 | 
			
		||||
noiz
 | 
			
		||||
nola
 | 
			
		||||
non
 | 
			
		||||
nondik
 | 
			
		||||
nongo
 | 
			
		||||
nor
 | 
			
		||||
nora
 | 
			
		||||
ze
 | 
			
		||||
zein
 | 
			
		||||
zen
 | 
			
		||||
zenbait
 | 
			
		||||
zenbat
 | 
			
		||||
zer
 | 
			
		||||
zergatik
 | 
			
		||||
ziren
 | 
			
		||||
zituen
 | 
			
		||||
zu
 | 
			
		||||
zuek
 | 
			
		||||
zuen
 | 
			
		||||
zuten
 | 
			
		||||
							
								
								
									
										313
									
								
								archiver/solr-config-dir/lang/stopwords_fa.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										313
									
								
								archiver/solr-config-dir/lang/stopwords_fa.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,313 @@
 | 
			
		|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
 | 
			
		||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
 | 
			
		||||
# Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
# Note: by default this file is used after normalization, so when adding entries
 | 
			
		||||
# to this file, use the arabic 'ي' instead of 'ی'
 | 
			
		||||
انان
 | 
			
		||||
نداشته
 | 
			
		||||
سراسر
 | 
			
		||||
خياه
 | 
			
		||||
ايشان
 | 
			
		||||
وي
 | 
			
		||||
تاكنون
 | 
			
		||||
بيشتري
 | 
			
		||||
دوم
 | 
			
		||||
پس
 | 
			
		||||
ناشي
 | 
			
		||||
وگو
 | 
			
		||||
يا
 | 
			
		||||
داشتند
 | 
			
		||||
سپس
 | 
			
		||||
هنگام
 | 
			
		||||
هرگز
 | 
			
		||||
پنج
 | 
			
		||||
نشان
 | 
			
		||||
امسال
 | 
			
		||||
ديگر
 | 
			
		||||
گروهي
 | 
			
		||||
شدند
 | 
			
		||||
چطور
 | 
			
		||||
ده
 | 
			
		||||
و
 | 
			
		||||
دو
 | 
			
		||||
نخستين
 | 
			
		||||
ولي
 | 
			
		||||
چرا
 | 
			
		||||
چه
 | 
			
		||||
وسط
 | 
			
		||||
ه
 | 
			
		||||
كدام
 | 
			
		||||
قابل
 | 
			
		||||
يك
 | 
			
		||||
رفت
 | 
			
		||||
هفت
 | 
			
		||||
همچنين
 | 
			
		||||
در
 | 
			
		||||
هزار
 | 
			
		||||
بله
 | 
			
		||||
بلي
 | 
			
		||||
شايد
 | 
			
		||||
اما
 | 
			
		||||
شناسي
 | 
			
		||||
گرفته
 | 
			
		||||
دهد
 | 
			
		||||
داشته
 | 
			
		||||
دانست
 | 
			
		||||
داشتن
 | 
			
		||||
خواهيم
 | 
			
		||||
ميليارد
 | 
			
		||||
وقتيكه
 | 
			
		||||
امد
 | 
			
		||||
خواهد
 | 
			
		||||
جز
 | 
			
		||||
اورده
 | 
			
		||||
شده
 | 
			
		||||
بلكه
 | 
			
		||||
خدمات
 | 
			
		||||
شدن
 | 
			
		||||
برخي
 | 
			
		||||
نبود
 | 
			
		||||
بسياري
 | 
			
		||||
جلوگيري
 | 
			
		||||
حق
 | 
			
		||||
كردند
 | 
			
		||||
نوعي
 | 
			
		||||
بعري
 | 
			
		||||
نكرده
 | 
			
		||||
نظير
 | 
			
		||||
نبايد
 | 
			
		||||
بوده
 | 
			
		||||
بودن
 | 
			
		||||
داد
 | 
			
		||||
اورد
 | 
			
		||||
هست
 | 
			
		||||
جايي
 | 
			
		||||
شود
 | 
			
		||||
دنبال
 | 
			
		||||
داده
 | 
			
		||||
بايد
 | 
			
		||||
سابق
 | 
			
		||||
هيچ
 | 
			
		||||
همان
 | 
			
		||||
انجا
 | 
			
		||||
كمتر
 | 
			
		||||
كجاست
 | 
			
		||||
گردد
 | 
			
		||||
كسي
 | 
			
		||||
تر
 | 
			
		||||
مردم
 | 
			
		||||
تان
 | 
			
		||||
دادن
 | 
			
		||||
بودند
 | 
			
		||||
سري
 | 
			
		||||
جدا
 | 
			
		||||
ندارند
 | 
			
		||||
مگر
 | 
			
		||||
يكديگر
 | 
			
		||||
دارد
 | 
			
		||||
دهند
 | 
			
		||||
بنابراين
 | 
			
		||||
هنگامي
 | 
			
		||||
سمت
 | 
			
		||||
جا
 | 
			
		||||
انچه
 | 
			
		||||
خود
 | 
			
		||||
دادند
 | 
			
		||||
زياد
 | 
			
		||||
دارند
 | 
			
		||||
اثر
 | 
			
		||||
بدون
 | 
			
		||||
بهترين
 | 
			
		||||
بيشتر
 | 
			
		||||
البته
 | 
			
		||||
به
 | 
			
		||||
براساس
 | 
			
		||||
بيرون
 | 
			
		||||
كرد
 | 
			
		||||
بعضي
 | 
			
		||||
گرفت
 | 
			
		||||
توي
 | 
			
		||||
اي
 | 
			
		||||
ميليون
 | 
			
		||||
او
 | 
			
		||||
جريان
 | 
			
		||||
تول
 | 
			
		||||
بر
 | 
			
		||||
مانند
 | 
			
		||||
برابر
 | 
			
		||||
باشيم
 | 
			
		||||
مدتي
 | 
			
		||||
گويند
 | 
			
		||||
اكنون
 | 
			
		||||
تا
 | 
			
		||||
تنها
 | 
			
		||||
جديد
 | 
			
		||||
چند
 | 
			
		||||
بي
 | 
			
		||||
نشده
 | 
			
		||||
كردن
 | 
			
		||||
كردم
 | 
			
		||||
گويد
 | 
			
		||||
كرده
 | 
			
		||||
كنيم
 | 
			
		||||
نمي
 | 
			
		||||
نزد
 | 
			
		||||
روي
 | 
			
		||||
قصد
 | 
			
		||||
فقط
 | 
			
		||||
بالاي
 | 
			
		||||
ديگران
 | 
			
		||||
اين
 | 
			
		||||
ديروز
 | 
			
		||||
توسط
 | 
			
		||||
سوم
 | 
			
		||||
ايم
 | 
			
		||||
دانند
 | 
			
		||||
سوي
 | 
			
		||||
استفاده
 | 
			
		||||
شما
 | 
			
		||||
كنار
 | 
			
		||||
داريم
 | 
			
		||||
ساخته
 | 
			
		||||
طور
 | 
			
		||||
امده
 | 
			
		||||
رفته
 | 
			
		||||
نخست
 | 
			
		||||
بيست
 | 
			
		||||
نزديك
 | 
			
		||||
طي
 | 
			
		||||
كنيد
 | 
			
		||||
از
 | 
			
		||||
انها
 | 
			
		||||
تمامي
 | 
			
		||||
داشت
 | 
			
		||||
يكي
 | 
			
		||||
طريق
 | 
			
		||||
اش
 | 
			
		||||
چيست
 | 
			
		||||
روب
 | 
			
		||||
نمايد
 | 
			
		||||
گفت
 | 
			
		||||
چندين
 | 
			
		||||
چيزي
 | 
			
		||||
تواند
 | 
			
		||||
ام
 | 
			
		||||
ايا
 | 
			
		||||
با
 | 
			
		||||
ان
 | 
			
		||||
ايد
 | 
			
		||||
ترين
 | 
			
		||||
اينكه
 | 
			
		||||
ديگري
 | 
			
		||||
راه
 | 
			
		||||
هايي
 | 
			
		||||
بروز
 | 
			
		||||
همچنان
 | 
			
		||||
پاعين
 | 
			
		||||
كس
 | 
			
		||||
حدود
 | 
			
		||||
مختلف
 | 
			
		||||
مقابل
 | 
			
		||||
چيز
 | 
			
		||||
گيرد
 | 
			
		||||
ندارد
 | 
			
		||||
ضد
 | 
			
		||||
همچون
 | 
			
		||||
سازي
 | 
			
		||||
شان
 | 
			
		||||
مورد
 | 
			
		||||
باره
 | 
			
		||||
مرسي
 | 
			
		||||
خويش
 | 
			
		||||
برخوردار
 | 
			
		||||
چون
 | 
			
		||||
خارج
 | 
			
		||||
شش
 | 
			
		||||
هنوز
 | 
			
		||||
تحت
 | 
			
		||||
ضمن
 | 
			
		||||
هستيم
 | 
			
		||||
گفته
 | 
			
		||||
فكر
 | 
			
		||||
بسيار
 | 
			
		||||
پيش
 | 
			
		||||
براي
 | 
			
		||||
روزهاي
 | 
			
		||||
انكه
 | 
			
		||||
نخواهد
 | 
			
		||||
بالا
 | 
			
		||||
كل
 | 
			
		||||
وقتي
 | 
			
		||||
كي
 | 
			
		||||
چنين
 | 
			
		||||
كه
 | 
			
		||||
گيري
 | 
			
		||||
نيست
 | 
			
		||||
است
 | 
			
		||||
كجا
 | 
			
		||||
كند
 | 
			
		||||
نيز
 | 
			
		||||
يابد
 | 
			
		||||
بندي
 | 
			
		||||
حتي
 | 
			
		||||
توانند
 | 
			
		||||
عقب
 | 
			
		||||
خواست
 | 
			
		||||
كنند
 | 
			
		||||
بين
 | 
			
		||||
تمام
 | 
			
		||||
همه
 | 
			
		||||
ما
 | 
			
		||||
باشند
 | 
			
		||||
مثل
 | 
			
		||||
شد
 | 
			
		||||
اري
 | 
			
		||||
باشد
 | 
			
		||||
اره
 | 
			
		||||
طبق
 | 
			
		||||
بعد
 | 
			
		||||
اگر
 | 
			
		||||
صورت
 | 
			
		||||
غير
 | 
			
		||||
جاي
 | 
			
		||||
بيش
 | 
			
		||||
ريزي
 | 
			
		||||
اند
 | 
			
		||||
زيرا
 | 
			
		||||
چگونه
 | 
			
		||||
بار
 | 
			
		||||
لطفا
 | 
			
		||||
مي
 | 
			
		||||
درباره
 | 
			
		||||
من
 | 
			
		||||
ديده
 | 
			
		||||
همين
 | 
			
		||||
گذاري
 | 
			
		||||
برداري
 | 
			
		||||
علت
 | 
			
		||||
گذاشته
 | 
			
		||||
هم
 | 
			
		||||
فوق
 | 
			
		||||
نه
 | 
			
		||||
ها
 | 
			
		||||
شوند
 | 
			
		||||
اباد
 | 
			
		||||
همواره
 | 
			
		||||
هر
 | 
			
		||||
اول
 | 
			
		||||
خواهند
 | 
			
		||||
چهار
 | 
			
		||||
نام
 | 
			
		||||
امروز
 | 
			
		||||
مان
 | 
			
		||||
هاي
 | 
			
		||||
قبل
 | 
			
		||||
كنم
 | 
			
		||||
سعي
 | 
			
		||||
تازه
 | 
			
		||||
را
 | 
			
		||||
هستند
 | 
			
		||||
زير
 | 
			
		||||
جلوي
 | 
			
		||||
عنوان
 | 
			
		||||
بود
 | 
			
		||||
							
								
								
									
										97
									
								
								archiver/solr-config-dir/lang/stopwords_fi.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								archiver/solr-config-dir/lang/stopwords_fi.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,97 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 
 | 
			
		||||
| forms of BE
 | 
			
		||||
 | 
			
		||||
olla
 | 
			
		||||
olen
 | 
			
		||||
olet
 | 
			
		||||
on
 | 
			
		||||
olemme
 | 
			
		||||
olette
 | 
			
		||||
ovat
 | 
			
		||||
ole        | negative form
 | 
			
		||||
 | 
			
		||||
oli
 | 
			
		||||
olisi
 | 
			
		||||
olisit
 | 
			
		||||
olisin
 | 
			
		||||
olisimme
 | 
			
		||||
olisitte
 | 
			
		||||
olisivat
 | 
			
		||||
olit
 | 
			
		||||
olin
 | 
			
		||||
olimme
 | 
			
		||||
olitte
 | 
			
		||||
olivat
 | 
			
		||||
ollut
 | 
			
		||||
olleet
 | 
			
		||||
 | 
			
		||||
en         | negation
 | 
			
		||||
et
 | 
			
		||||
ei
 | 
			
		||||
emme
 | 
			
		||||
ette
 | 
			
		||||
eivät
 | 
			
		||||
 | 
			
		||||
|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
 | 
			
		||||
minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
 | 
			
		||||
sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
 | 
			
		||||
hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
 | 
			
		||||
me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
 | 
			
		||||
te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
 | 
			
		||||
he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
 | 
			
		||||
 | 
			
		||||
tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
 | 
			
		||||
tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
 | 
			
		||||
se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
 | 
			
		||||
nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
 | 
			
		||||
nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
 | 
			
		||||
ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
 | 
			
		||||
 | 
			
		||||
kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
 | 
			
		||||
ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
 | 
			
		||||
mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
 | 
			
		||||
mitkä                                                                                    | (pl)
 | 
			
		||||
 | 
			
		||||
joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
 | 
			
		||||
jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
 | 
			
		||||
 | 
			
		||||
| conjunctions
 | 
			
		||||
 | 
			
		||||
että   | that
 | 
			
		||||
ja     | and
 | 
			
		||||
jos    | if
 | 
			
		||||
koska  | because
 | 
			
		||||
kuin   | than
 | 
			
		||||
mutta  | but
 | 
			
		||||
niin   | so
 | 
			
		||||
sekä   | and
 | 
			
		||||
sillä  | for
 | 
			
		||||
tai    | or
 | 
			
		||||
vaan   | but
 | 
			
		||||
vai    | or
 | 
			
		||||
vaikka | although
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| prepositions
 | 
			
		||||
 | 
			
		||||
kanssa  | with
 | 
			
		||||
mukaan  | according to
 | 
			
		||||
noin    | about
 | 
			
		||||
poikki  | across
 | 
			
		||||
yli     | over, across
 | 
			
		||||
 | 
			
		||||
| other
 | 
			
		||||
 | 
			
		||||
kun    | when
 | 
			
		||||
niin   | so
 | 
			
		||||
nyt    | now
 | 
			
		||||
itse   | self
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										186
									
								
								archiver/solr-config-dir/lang/stopwords_fr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								archiver/solr-config-dir/lang/stopwords_fr.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,186 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A French stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
au             |  a + le
 | 
			
		||||
aux            |  a + les
 | 
			
		||||
avec           |  with
 | 
			
		||||
ce             |  this
 | 
			
		||||
ces            |  these
 | 
			
		||||
dans           |  with
 | 
			
		||||
de             |  of
 | 
			
		||||
des            |  de + les
 | 
			
		||||
du             |  de + le
 | 
			
		||||
elle           |  she
 | 
			
		||||
en             |  `of them' etc
 | 
			
		||||
et             |  and
 | 
			
		||||
eux            |  them
 | 
			
		||||
il             |  he
 | 
			
		||||
je             |  I
 | 
			
		||||
la             |  the
 | 
			
		||||
le             |  the
 | 
			
		||||
leur           |  their
 | 
			
		||||
lui            |  him
 | 
			
		||||
ma             |  my (fem)
 | 
			
		||||
mais           |  but
 | 
			
		||||
me             |  me
 | 
			
		||||
même           |  same; as in moi-même (myself) etc
 | 
			
		||||
mes            |  me (pl)
 | 
			
		||||
moi            |  me
 | 
			
		||||
mon            |  my (masc)
 | 
			
		||||
ne             |  not
 | 
			
		||||
nos            |  our (pl)
 | 
			
		||||
notre          |  our
 | 
			
		||||
nous           |  we
 | 
			
		||||
on             |  one
 | 
			
		||||
ou             |  where
 | 
			
		||||
par            |  by
 | 
			
		||||
pas            |  not
 | 
			
		||||
pour           |  for
 | 
			
		||||
qu             |  que before vowel
 | 
			
		||||
que            |  that
 | 
			
		||||
qui            |  who
 | 
			
		||||
sa             |  his, her (fem)
 | 
			
		||||
se             |  oneself
 | 
			
		||||
ses            |  his (pl)
 | 
			
		||||
son            |  his, her (masc)
 | 
			
		||||
sur            |  on
 | 
			
		||||
ta             |  thy (fem)
 | 
			
		||||
te             |  thee
 | 
			
		||||
tes            |  thy (pl)
 | 
			
		||||
toi            |  thee
 | 
			
		||||
ton            |  thy (masc)
 | 
			
		||||
tu             |  thou
 | 
			
		||||
un             |  a
 | 
			
		||||
une            |  a
 | 
			
		||||
vos            |  your (pl)
 | 
			
		||||
votre          |  your
 | 
			
		||||
vous           |  you
 | 
			
		||||
 | 
			
		||||
               |  single letter forms
 | 
			
		||||
 | 
			
		||||
c              |  c'
 | 
			
		||||
d              |  d'
 | 
			
		||||
j              |  j'
 | 
			
		||||
l              |  l'
 | 
			
		||||
à              |  to, at
 | 
			
		||||
m              |  m'
 | 
			
		||||
n              |  n'
 | 
			
		||||
s              |  s'
 | 
			
		||||
t              |  t'
 | 
			
		||||
y              |  there
 | 
			
		||||
 | 
			
		||||
               | forms of être (not including the infinitive):
 | 
			
		||||
été
 | 
			
		||||
étée
 | 
			
		||||
étées
 | 
			
		||||
étés
 | 
			
		||||
étant
 | 
			
		||||
suis
 | 
			
		||||
es
 | 
			
		||||
est
 | 
			
		||||
sommes
 | 
			
		||||
êtes
 | 
			
		||||
sont
 | 
			
		||||
serai
 | 
			
		||||
seras
 | 
			
		||||
sera
 | 
			
		||||
serons
 | 
			
		||||
serez
 | 
			
		||||
seront
 | 
			
		||||
serais
 | 
			
		||||
serait
 | 
			
		||||
serions
 | 
			
		||||
seriez
 | 
			
		||||
seraient
 | 
			
		||||
étais
 | 
			
		||||
était
 | 
			
		||||
étions
 | 
			
		||||
étiez
 | 
			
		||||
étaient
 | 
			
		||||
fus
 | 
			
		||||
fut
 | 
			
		||||
fûmes
 | 
			
		||||
fûtes
 | 
			
		||||
furent
 | 
			
		||||
sois
 | 
			
		||||
soit
 | 
			
		||||
soyons
 | 
			
		||||
soyez
 | 
			
		||||
soient
 | 
			
		||||
fusse
 | 
			
		||||
fusses
 | 
			
		||||
fût
 | 
			
		||||
fussions
 | 
			
		||||
fussiez
 | 
			
		||||
fussent
 | 
			
		||||
 | 
			
		||||
               | forms of avoir (not including the infinitive):
 | 
			
		||||
ayant
 | 
			
		||||
eu
 | 
			
		||||
eue
 | 
			
		||||
eues
 | 
			
		||||
eus
 | 
			
		||||
ai
 | 
			
		||||
as
 | 
			
		||||
avons
 | 
			
		||||
avez
 | 
			
		||||
ont
 | 
			
		||||
aurai
 | 
			
		||||
auras
 | 
			
		||||
aura
 | 
			
		||||
aurons
 | 
			
		||||
aurez
 | 
			
		||||
auront
 | 
			
		||||
aurais
 | 
			
		||||
aurait
 | 
			
		||||
aurions
 | 
			
		||||
auriez
 | 
			
		||||
auraient
 | 
			
		||||
avais
 | 
			
		||||
avait
 | 
			
		||||
avions
 | 
			
		||||
aviez
 | 
			
		||||
avaient
 | 
			
		||||
eut
 | 
			
		||||
eûmes
 | 
			
		||||
eûtes
 | 
			
		||||
eurent
 | 
			
		||||
aie
 | 
			
		||||
aies
 | 
			
		||||
ait
 | 
			
		||||
ayons
 | 
			
		||||
ayez
 | 
			
		||||
aient
 | 
			
		||||
eusse
 | 
			
		||||
eusses
 | 
			
		||||
eût
 | 
			
		||||
eussions
 | 
			
		||||
eussiez
 | 
			
		||||
eussent
 | 
			
		||||
 | 
			
		||||
               | Later additions (from Jean-Christophe Deschamps)
 | 
			
		||||
ceci           |  this
 | 
			
		||||
cela           |  that
 | 
			
		||||
celà           |  that
 | 
			
		||||
cet            |  this
 | 
			
		||||
cette          |  this
 | 
			
		||||
ici            |  here
 | 
			
		||||
ils            |  they
 | 
			
		||||
les            |  the (pl)
 | 
			
		||||
leurs          |  their (pl)
 | 
			
		||||
quel           |  which
 | 
			
		||||
quels          |  which
 | 
			
		||||
quelle         |  which
 | 
			
		||||
quelles        |  which
 | 
			
		||||
sans           |  without
 | 
			
		||||
soi            |  oneself
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_ga.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,110 @@
 | 
			
		|||
 | 
			
		||||
a
 | 
			
		||||
ach
 | 
			
		||||
ag
 | 
			
		||||
agus
 | 
			
		||||
an
 | 
			
		||||
aon
 | 
			
		||||
ar
 | 
			
		||||
arna
 | 
			
		||||
as
 | 
			
		||||
b'
 | 
			
		||||
ba
 | 
			
		||||
beirt
 | 
			
		||||
bhúr
 | 
			
		||||
caoga
 | 
			
		||||
ceathair
 | 
			
		||||
ceathrar
 | 
			
		||||
chomh
 | 
			
		||||
chtó
 | 
			
		||||
chuig
 | 
			
		||||
chun
 | 
			
		||||
cois
 | 
			
		||||
céad
 | 
			
		||||
cúig
 | 
			
		||||
cúigear
 | 
			
		||||
d'
 | 
			
		||||
daichead
 | 
			
		||||
dar
 | 
			
		||||
de
 | 
			
		||||
deich
 | 
			
		||||
deichniúr
 | 
			
		||||
den
 | 
			
		||||
dhá
 | 
			
		||||
do
 | 
			
		||||
don
 | 
			
		||||
dtí
 | 
			
		||||
dá
 | 
			
		||||
dár
 | 
			
		||||
dó
 | 
			
		||||
faoi
 | 
			
		||||
faoin
 | 
			
		||||
faoina
 | 
			
		||||
faoinár
 | 
			
		||||
fara
 | 
			
		||||
fiche
 | 
			
		||||
gach
 | 
			
		||||
gan
 | 
			
		||||
go
 | 
			
		||||
gur
 | 
			
		||||
haon
 | 
			
		||||
hocht
 | 
			
		||||
i
 | 
			
		||||
iad
 | 
			
		||||
idir
 | 
			
		||||
in
 | 
			
		||||
ina
 | 
			
		||||
ins
 | 
			
		||||
inár
 | 
			
		||||
is
 | 
			
		||||
le
 | 
			
		||||
leis
 | 
			
		||||
lena
 | 
			
		||||
lenár
 | 
			
		||||
m'
 | 
			
		||||
mar
 | 
			
		||||
mo
 | 
			
		||||
mé
 | 
			
		||||
na
 | 
			
		||||
nach
 | 
			
		||||
naoi
 | 
			
		||||
naonúr
 | 
			
		||||
ná
 | 
			
		||||
ní
 | 
			
		||||
níor
 | 
			
		||||
nó
 | 
			
		||||
nócha
 | 
			
		||||
ocht
 | 
			
		||||
ochtar
 | 
			
		||||
os
 | 
			
		||||
roimh
 | 
			
		||||
sa
 | 
			
		||||
seacht
 | 
			
		||||
seachtar
 | 
			
		||||
seachtó
 | 
			
		||||
seasca
 | 
			
		||||
seisear
 | 
			
		||||
siad
 | 
			
		||||
sibh
 | 
			
		||||
sinn
 | 
			
		||||
sna
 | 
			
		||||
sé
 | 
			
		||||
sí
 | 
			
		||||
tar
 | 
			
		||||
thar
 | 
			
		||||
thú
 | 
			
		||||
triúr
 | 
			
		||||
trí
 | 
			
		||||
trína
 | 
			
		||||
trínár
 | 
			
		||||
tríocha
 | 
			
		||||
tú
 | 
			
		||||
um
 | 
			
		||||
ár
 | 
			
		||||
é
 | 
			
		||||
éis
 | 
			
		||||
í
 | 
			
		||||
ó
 | 
			
		||||
ón
 | 
			
		||||
óna
 | 
			
		||||
ónár
 | 
			
		||||
							
								
								
									
										161
									
								
								archiver/solr-config-dir/lang/stopwords_gl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								archiver/solr-config-dir/lang/stopwords_gl.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,161 @@
 | 
			
		|||
# galican stopwords
 | 
			
		||||
a
 | 
			
		||||
aínda
 | 
			
		||||
alí
 | 
			
		||||
aquel
 | 
			
		||||
aquela
 | 
			
		||||
aquelas
 | 
			
		||||
aqueles
 | 
			
		||||
aquilo
 | 
			
		||||
aquí
 | 
			
		||||
ao
 | 
			
		||||
aos
 | 
			
		||||
as
 | 
			
		||||
así
 | 
			
		||||
á
 | 
			
		||||
ben
 | 
			
		||||
cando
 | 
			
		||||
che
 | 
			
		||||
co
 | 
			
		||||
coa
 | 
			
		||||
comigo
 | 
			
		||||
con
 | 
			
		||||
connosco
 | 
			
		||||
contigo
 | 
			
		||||
convosco
 | 
			
		||||
coas
 | 
			
		||||
cos
 | 
			
		||||
cun
 | 
			
		||||
cuns
 | 
			
		||||
cunha
 | 
			
		||||
cunhas
 | 
			
		||||
da
 | 
			
		||||
dalgunha
 | 
			
		||||
dalgunhas
 | 
			
		||||
dalgún
 | 
			
		||||
dalgúns
 | 
			
		||||
das
 | 
			
		||||
de
 | 
			
		||||
del
 | 
			
		||||
dela
 | 
			
		||||
delas
 | 
			
		||||
deles
 | 
			
		||||
desde
 | 
			
		||||
deste
 | 
			
		||||
do
 | 
			
		||||
dos
 | 
			
		||||
dun
 | 
			
		||||
duns
 | 
			
		||||
dunha
 | 
			
		||||
dunhas
 | 
			
		||||
e
 | 
			
		||||
el
 | 
			
		||||
ela
 | 
			
		||||
elas
 | 
			
		||||
eles
 | 
			
		||||
en
 | 
			
		||||
era
 | 
			
		||||
eran
 | 
			
		||||
esa
 | 
			
		||||
esas
 | 
			
		||||
ese
 | 
			
		||||
eses
 | 
			
		||||
esta
 | 
			
		||||
estar
 | 
			
		||||
estaba
 | 
			
		||||
está
 | 
			
		||||
están
 | 
			
		||||
este
 | 
			
		||||
estes
 | 
			
		||||
estiven
 | 
			
		||||
estou
 | 
			
		||||
eu
 | 
			
		||||
é
 | 
			
		||||
facer
 | 
			
		||||
foi
 | 
			
		||||
foron
 | 
			
		||||
fun
 | 
			
		||||
había
 | 
			
		||||
hai
 | 
			
		||||
iso
 | 
			
		||||
isto
 | 
			
		||||
la
 | 
			
		||||
las
 | 
			
		||||
lle
 | 
			
		||||
lles
 | 
			
		||||
lo
 | 
			
		||||
los
 | 
			
		||||
mais
 | 
			
		||||
me
 | 
			
		||||
meu
 | 
			
		||||
meus
 | 
			
		||||
min
 | 
			
		||||
miña
 | 
			
		||||
miñas
 | 
			
		||||
moi
 | 
			
		||||
na
 | 
			
		||||
nas
 | 
			
		||||
neste
 | 
			
		||||
nin
 | 
			
		||||
no
 | 
			
		||||
non
 | 
			
		||||
nos
 | 
			
		||||
nosa
 | 
			
		||||
nosas
 | 
			
		||||
noso
 | 
			
		||||
nosos
 | 
			
		||||
nós
 | 
			
		||||
nun
 | 
			
		||||
nunha
 | 
			
		||||
nuns
 | 
			
		||||
nunhas
 | 
			
		||||
o
 | 
			
		||||
os
 | 
			
		||||
ou
 | 
			
		||||
ó
 | 
			
		||||
ós
 | 
			
		||||
para
 | 
			
		||||
pero
 | 
			
		||||
pode
 | 
			
		||||
pois
 | 
			
		||||
pola
 | 
			
		||||
polas
 | 
			
		||||
polo
 | 
			
		||||
polos
 | 
			
		||||
por
 | 
			
		||||
que
 | 
			
		||||
se
 | 
			
		||||
senón
 | 
			
		||||
ser
 | 
			
		||||
seu
 | 
			
		||||
seus
 | 
			
		||||
sexa
 | 
			
		||||
sido
 | 
			
		||||
sobre
 | 
			
		||||
súa
 | 
			
		||||
súas
 | 
			
		||||
tamén
 | 
			
		||||
tan
 | 
			
		||||
te
 | 
			
		||||
ten
 | 
			
		||||
teñen
 | 
			
		||||
teño
 | 
			
		||||
ter
 | 
			
		||||
teu
 | 
			
		||||
teus
 | 
			
		||||
ti
 | 
			
		||||
tido
 | 
			
		||||
tiña
 | 
			
		||||
tiven
 | 
			
		||||
túa
 | 
			
		||||
túas
 | 
			
		||||
un
 | 
			
		||||
unha
 | 
			
		||||
unhas
 | 
			
		||||
uns
 | 
			
		||||
vos
 | 
			
		||||
vosa
 | 
			
		||||
vosas
 | 
			
		||||
voso
 | 
			
		||||
vosos
 | 
			
		||||
vós
 | 
			
		||||
							
								
								
									
										235
									
								
								archiver/solr-config-dir/lang/stopwords_hi.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								archiver/solr-config-dir/lang/stopwords_hi.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,235 @@
 | 
			
		|||
# Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
 | 
			
		||||
# This file was created by Jacques Savoy and is distributed under the BSD license.
 | 
			
		||||
# Note: by default this file also contains forms normalized by HindiNormalizer 
 | 
			
		||||
# for spelling variation (see section below), such that it can be used whether or 
 | 
			
		||||
# not you enable that feature. When adding additional entries to this list,
 | 
			
		||||
# please add the normalized form as well. 
 | 
			
		||||
अंदर
 | 
			
		||||
अत
 | 
			
		||||
अपना
 | 
			
		||||
अपनी
 | 
			
		||||
अपने
 | 
			
		||||
अभी
 | 
			
		||||
आदि
 | 
			
		||||
आप
 | 
			
		||||
इत्यादि
 | 
			
		||||
इन 
 | 
			
		||||
इनका
 | 
			
		||||
इन्हीं
 | 
			
		||||
इन्हें
 | 
			
		||||
इन्हों
 | 
			
		||||
इस
 | 
			
		||||
इसका
 | 
			
		||||
इसकी
 | 
			
		||||
इसके
 | 
			
		||||
इसमें
 | 
			
		||||
इसी
 | 
			
		||||
इसे
 | 
			
		||||
उन
 | 
			
		||||
उनका
 | 
			
		||||
उनकी
 | 
			
		||||
उनके
 | 
			
		||||
उनको
 | 
			
		||||
उन्हीं
 | 
			
		||||
उन्हें
 | 
			
		||||
उन्हों
 | 
			
		||||
उस
 | 
			
		||||
उसके
 | 
			
		||||
उसी
 | 
			
		||||
उसे
 | 
			
		||||
एक
 | 
			
		||||
एवं
 | 
			
		||||
एस
 | 
			
		||||
ऐसे
 | 
			
		||||
और
 | 
			
		||||
कई
 | 
			
		||||
कर
 | 
			
		||||
करता
 | 
			
		||||
करते
 | 
			
		||||
करना
 | 
			
		||||
करने
 | 
			
		||||
करें
 | 
			
		||||
कहते
 | 
			
		||||
कहा
 | 
			
		||||
का
 | 
			
		||||
काफ़ी
 | 
			
		||||
कि
 | 
			
		||||
कितना
 | 
			
		||||
किन्हें
 | 
			
		||||
किन्हों
 | 
			
		||||
किया
 | 
			
		||||
किर
 | 
			
		||||
किस
 | 
			
		||||
किसी
 | 
			
		||||
किसे
 | 
			
		||||
की
 | 
			
		||||
कुछ
 | 
			
		||||
कुल
 | 
			
		||||
के
 | 
			
		||||
को
 | 
			
		||||
कोई
 | 
			
		||||
कौन
 | 
			
		||||
कौनसा
 | 
			
		||||
गया
 | 
			
		||||
घर
 | 
			
		||||
जब
 | 
			
		||||
जहाँ
 | 
			
		||||
जा
 | 
			
		||||
जितना
 | 
			
		||||
जिन
 | 
			
		||||
जिन्हें
 | 
			
		||||
जिन्हों
 | 
			
		||||
जिस
 | 
			
		||||
जिसे
 | 
			
		||||
जीधर
 | 
			
		||||
जैसा
 | 
			
		||||
जैसे
 | 
			
		||||
जो
 | 
			
		||||
तक
 | 
			
		||||
तब
 | 
			
		||||
तरह
 | 
			
		||||
तिन
 | 
			
		||||
तिन्हें
 | 
			
		||||
तिन्हों
 | 
			
		||||
तिस
 | 
			
		||||
तिसे
 | 
			
		||||
तो
 | 
			
		||||
था
 | 
			
		||||
थी
 | 
			
		||||
थे
 | 
			
		||||
दबारा
 | 
			
		||||
दिया
 | 
			
		||||
दुसरा
 | 
			
		||||
दूसरे
 | 
			
		||||
दो
 | 
			
		||||
द्वारा
 | 
			
		||||
न
 | 
			
		||||
नहीं
 | 
			
		||||
ना
 | 
			
		||||
निहायत
 | 
			
		||||
नीचे
 | 
			
		||||
ने
 | 
			
		||||
पर
 | 
			
		||||
पर  
 | 
			
		||||
पहले
 | 
			
		||||
पूरा
 | 
			
		||||
पे
 | 
			
		||||
फिर
 | 
			
		||||
बनी
 | 
			
		||||
बही
 | 
			
		||||
बहुत
 | 
			
		||||
बाद
 | 
			
		||||
बाला
 | 
			
		||||
बिलकुल
 | 
			
		||||
भी
 | 
			
		||||
भीतर
 | 
			
		||||
मगर
 | 
			
		||||
मानो
 | 
			
		||||
मे
 | 
			
		||||
में
 | 
			
		||||
यदि
 | 
			
		||||
यह
 | 
			
		||||
यहाँ
 | 
			
		||||
यही
 | 
			
		||||
या
 | 
			
		||||
यिह 
 | 
			
		||||
ये
 | 
			
		||||
रखें
 | 
			
		||||
रहा
 | 
			
		||||
रहे
 | 
			
		||||
ऱ्वासा
 | 
			
		||||
लिए
 | 
			
		||||
लिये
 | 
			
		||||
लेकिन
 | 
			
		||||
व
 | 
			
		||||
वर्ग
 | 
			
		||||
वह
 | 
			
		||||
वह 
 | 
			
		||||
वहाँ
 | 
			
		||||
वहीं
 | 
			
		||||
वाले
 | 
			
		||||
वुह 
 | 
			
		||||
वे
 | 
			
		||||
वग़ैरह
 | 
			
		||||
संग
 | 
			
		||||
सकता
 | 
			
		||||
सकते
 | 
			
		||||
सबसे
 | 
			
		||||
सभी
 | 
			
		||||
साथ
 | 
			
		||||
साबुत
 | 
			
		||||
साभ
 | 
			
		||||
सारा
 | 
			
		||||
से
 | 
			
		||||
सो
 | 
			
		||||
ही
 | 
			
		||||
हुआ
 | 
			
		||||
हुई
 | 
			
		||||
हुए
 | 
			
		||||
है
 | 
			
		||||
हैं
 | 
			
		||||
हो
 | 
			
		||||
होता
 | 
			
		||||
होती
 | 
			
		||||
होते
 | 
			
		||||
होना
 | 
			
		||||
होने
 | 
			
		||||
# additional normalized forms of the above
 | 
			
		||||
अपनि
 | 
			
		||||
जेसे
 | 
			
		||||
होति
 | 
			
		||||
सभि
 | 
			
		||||
तिंहों
 | 
			
		||||
इंहों
 | 
			
		||||
दवारा
 | 
			
		||||
इसि
 | 
			
		||||
किंहें
 | 
			
		||||
थि
 | 
			
		||||
उंहों
 | 
			
		||||
ओर
 | 
			
		||||
जिंहें
 | 
			
		||||
वहिं
 | 
			
		||||
अभि
 | 
			
		||||
बनि
 | 
			
		||||
हि
 | 
			
		||||
उंहिं
 | 
			
		||||
उंहें
 | 
			
		||||
हें
 | 
			
		||||
वगेरह
 | 
			
		||||
एसे
 | 
			
		||||
रवासा
 | 
			
		||||
कोन
 | 
			
		||||
निचे
 | 
			
		||||
काफि
 | 
			
		||||
उसि
 | 
			
		||||
पुरा
 | 
			
		||||
भितर
 | 
			
		||||
हे
 | 
			
		||||
बहि
 | 
			
		||||
वहां
 | 
			
		||||
कोइ
 | 
			
		||||
यहां
 | 
			
		||||
जिंहों
 | 
			
		||||
तिंहें
 | 
			
		||||
किसि
 | 
			
		||||
कइ
 | 
			
		||||
यहि
 | 
			
		||||
इंहिं
 | 
			
		||||
जिधर
 | 
			
		||||
इंहें
 | 
			
		||||
अदि
 | 
			
		||||
इतयादि
 | 
			
		||||
हुइ
 | 
			
		||||
कोनसा
 | 
			
		||||
इसकि
 | 
			
		||||
दुसरे
 | 
			
		||||
जहां
 | 
			
		||||
अप
 | 
			
		||||
किंहों
 | 
			
		||||
उनकि
 | 
			
		||||
भि
 | 
			
		||||
वरग
 | 
			
		||||
हुअ
 | 
			
		||||
जेसा
 | 
			
		||||
नहिं
 | 
			
		||||
							
								
								
									
										211
									
								
								archiver/solr-config-dir/lang/stopwords_hu.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								archiver/solr-config-dir/lang/stopwords_hu.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,211 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 
 | 
			
		||||
| Hungarian stop word list
 | 
			
		||||
| prepared by Anna Tordai
 | 
			
		||||
 | 
			
		||||
a
 | 
			
		||||
ahogy
 | 
			
		||||
ahol
 | 
			
		||||
aki
 | 
			
		||||
akik
 | 
			
		||||
akkor
 | 
			
		||||
alatt
 | 
			
		||||
által
 | 
			
		||||
általában
 | 
			
		||||
amely
 | 
			
		||||
amelyek
 | 
			
		||||
amelyekben
 | 
			
		||||
amelyeket
 | 
			
		||||
amelyet
 | 
			
		||||
amelynek
 | 
			
		||||
ami
 | 
			
		||||
amit
 | 
			
		||||
amolyan
 | 
			
		||||
amíg
 | 
			
		||||
amikor
 | 
			
		||||
át
 | 
			
		||||
abban
 | 
			
		||||
ahhoz
 | 
			
		||||
annak
 | 
			
		||||
arra
 | 
			
		||||
arról
 | 
			
		||||
az
 | 
			
		||||
azok
 | 
			
		||||
azon
 | 
			
		||||
azt
 | 
			
		||||
azzal
 | 
			
		||||
azért
 | 
			
		||||
aztán
 | 
			
		||||
azután
 | 
			
		||||
azonban
 | 
			
		||||
bár
 | 
			
		||||
be
 | 
			
		||||
belül
 | 
			
		||||
benne
 | 
			
		||||
cikk
 | 
			
		||||
cikkek
 | 
			
		||||
cikkeket
 | 
			
		||||
csak
 | 
			
		||||
de
 | 
			
		||||
e
 | 
			
		||||
eddig
 | 
			
		||||
egész
 | 
			
		||||
egy
 | 
			
		||||
egyes
 | 
			
		||||
egyetlen
 | 
			
		||||
egyéb
 | 
			
		||||
egyik
 | 
			
		||||
egyre
 | 
			
		||||
ekkor
 | 
			
		||||
el
 | 
			
		||||
elég
 | 
			
		||||
ellen
 | 
			
		||||
elő
 | 
			
		||||
először
 | 
			
		||||
előtt
 | 
			
		||||
első
 | 
			
		||||
én
 | 
			
		||||
éppen
 | 
			
		||||
ebben
 | 
			
		||||
ehhez
 | 
			
		||||
emilyen
 | 
			
		||||
ennek
 | 
			
		||||
erre
 | 
			
		||||
ez
 | 
			
		||||
ezt
 | 
			
		||||
ezek
 | 
			
		||||
ezen
 | 
			
		||||
ezzel
 | 
			
		||||
ezért
 | 
			
		||||
és
 | 
			
		||||
fel
 | 
			
		||||
felé
 | 
			
		||||
hanem
 | 
			
		||||
hiszen
 | 
			
		||||
hogy
 | 
			
		||||
hogyan
 | 
			
		||||
igen
 | 
			
		||||
így
 | 
			
		||||
illetve
 | 
			
		||||
ill.
 | 
			
		||||
ill
 | 
			
		||||
ilyen
 | 
			
		||||
ilyenkor
 | 
			
		||||
ison
 | 
			
		||||
ismét
 | 
			
		||||
itt
 | 
			
		||||
jó
 | 
			
		||||
jól
 | 
			
		||||
jobban
 | 
			
		||||
kell
 | 
			
		||||
kellett
 | 
			
		||||
keresztül
 | 
			
		||||
keressünk
 | 
			
		||||
ki
 | 
			
		||||
kívül
 | 
			
		||||
között
 | 
			
		||||
közül
 | 
			
		||||
legalább
 | 
			
		||||
lehet
 | 
			
		||||
lehetett
 | 
			
		||||
legyen
 | 
			
		||||
lenne
 | 
			
		||||
lenni
 | 
			
		||||
lesz
 | 
			
		||||
lett
 | 
			
		||||
maga
 | 
			
		||||
magát
 | 
			
		||||
majd
 | 
			
		||||
majd
 | 
			
		||||
már
 | 
			
		||||
más
 | 
			
		||||
másik
 | 
			
		||||
meg
 | 
			
		||||
még
 | 
			
		||||
mellett
 | 
			
		||||
mert
 | 
			
		||||
mely
 | 
			
		||||
melyek
 | 
			
		||||
mi
 | 
			
		||||
mit
 | 
			
		||||
míg
 | 
			
		||||
miért
 | 
			
		||||
milyen
 | 
			
		||||
mikor
 | 
			
		||||
minden
 | 
			
		||||
mindent
 | 
			
		||||
mindenki
 | 
			
		||||
mindig
 | 
			
		||||
mint
 | 
			
		||||
mintha
 | 
			
		||||
mivel
 | 
			
		||||
most
 | 
			
		||||
nagy
 | 
			
		||||
nagyobb
 | 
			
		||||
nagyon
 | 
			
		||||
ne
 | 
			
		||||
néha
 | 
			
		||||
nekem
 | 
			
		||||
neki
 | 
			
		||||
nem
 | 
			
		||||
néhány
 | 
			
		||||
nélkül
 | 
			
		||||
nincs
 | 
			
		||||
olyan
 | 
			
		||||
ott
 | 
			
		||||
össze
 | 
			
		||||
ő
 | 
			
		||||
ők
 | 
			
		||||
őket
 | 
			
		||||
pedig
 | 
			
		||||
persze
 | 
			
		||||
rá
 | 
			
		||||
s
 | 
			
		||||
saját
 | 
			
		||||
sem
 | 
			
		||||
semmi
 | 
			
		||||
sok
 | 
			
		||||
sokat
 | 
			
		||||
sokkal
 | 
			
		||||
számára
 | 
			
		||||
szemben
 | 
			
		||||
szerint
 | 
			
		||||
szinte
 | 
			
		||||
talán
 | 
			
		||||
tehát
 | 
			
		||||
teljes
 | 
			
		||||
tovább
 | 
			
		||||
továbbá
 | 
			
		||||
több
 | 
			
		||||
úgy
 | 
			
		||||
ugyanis
 | 
			
		||||
új
 | 
			
		||||
újabb
 | 
			
		||||
újra
 | 
			
		||||
után
 | 
			
		||||
utána
 | 
			
		||||
utolsó
 | 
			
		||||
vagy
 | 
			
		||||
vagyis
 | 
			
		||||
valaki
 | 
			
		||||
valami
 | 
			
		||||
valamint
 | 
			
		||||
való
 | 
			
		||||
vagyok
 | 
			
		||||
van
 | 
			
		||||
vannak
 | 
			
		||||
volt
 | 
			
		||||
voltam
 | 
			
		||||
voltak
 | 
			
		||||
voltunk
 | 
			
		||||
vissza
 | 
			
		||||
vele
 | 
			
		||||
viszont
 | 
			
		||||
volna
 | 
			
		||||
							
								
								
									
										46
									
								
								archiver/solr-config-dir/lang/stopwords_hy.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								archiver/solr-config-dir/lang/stopwords_hy.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,46 @@
 | 
			
		|||
# example set of Armenian stopwords.
 | 
			
		||||
այդ
 | 
			
		||||
այլ
 | 
			
		||||
այն
 | 
			
		||||
այս
 | 
			
		||||
դու
 | 
			
		||||
դուք
 | 
			
		||||
եմ
 | 
			
		||||
են
 | 
			
		||||
ենք
 | 
			
		||||
ես
 | 
			
		||||
եք
 | 
			
		||||
է
 | 
			
		||||
էի
 | 
			
		||||
էին
 | 
			
		||||
էինք
 | 
			
		||||
էիր
 | 
			
		||||
էիք
 | 
			
		||||
էր
 | 
			
		||||
ըստ
 | 
			
		||||
թ
 | 
			
		||||
ի
 | 
			
		||||
ին
 | 
			
		||||
իսկ
 | 
			
		||||
իր
 | 
			
		||||
կամ
 | 
			
		||||
համար
 | 
			
		||||
հետ
 | 
			
		||||
հետո
 | 
			
		||||
մենք
 | 
			
		||||
մեջ
 | 
			
		||||
մի
 | 
			
		||||
ն
 | 
			
		||||
նա
 | 
			
		||||
նաև
 | 
			
		||||
նրա
 | 
			
		||||
նրանք
 | 
			
		||||
որ
 | 
			
		||||
որը
 | 
			
		||||
որոնք
 | 
			
		||||
որպես
 | 
			
		||||
ու
 | 
			
		||||
ում
 | 
			
		||||
պիտի
 | 
			
		||||
վրա
 | 
			
		||||
և
 | 
			
		||||
							
								
								
									
										359
									
								
								archiver/solr-config-dir/lang/stopwords_id.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										359
									
								
								archiver/solr-config-dir/lang/stopwords_id.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,359 @@
 | 
			
		|||
# from appendix D of: A Study of Stemming Effects on Information
 | 
			
		||||
# Retrieval in Bahasa Indonesia
 | 
			
		||||
ada
 | 
			
		||||
adanya
 | 
			
		||||
adalah
 | 
			
		||||
adapun
 | 
			
		||||
agak
 | 
			
		||||
agaknya
 | 
			
		||||
agar
 | 
			
		||||
akan
 | 
			
		||||
akankah
 | 
			
		||||
akhirnya
 | 
			
		||||
aku
 | 
			
		||||
akulah
 | 
			
		||||
amat
 | 
			
		||||
amatlah
 | 
			
		||||
anda
 | 
			
		||||
andalah
 | 
			
		||||
antar
 | 
			
		||||
diantaranya
 | 
			
		||||
antara
 | 
			
		||||
antaranya
 | 
			
		||||
diantara
 | 
			
		||||
apa
 | 
			
		||||
apaan
 | 
			
		||||
mengapa
 | 
			
		||||
apabila
 | 
			
		||||
apakah
 | 
			
		||||
apalagi
 | 
			
		||||
apatah
 | 
			
		||||
atau
 | 
			
		||||
ataukah
 | 
			
		||||
ataupun
 | 
			
		||||
bagai
 | 
			
		||||
bagaikan
 | 
			
		||||
sebagai
 | 
			
		||||
sebagainya
 | 
			
		||||
bagaimana
 | 
			
		||||
bagaimanapun
 | 
			
		||||
sebagaimana
 | 
			
		||||
bagaimanakah
 | 
			
		||||
bagi
 | 
			
		||||
bahkan
 | 
			
		||||
bahwa
 | 
			
		||||
bahwasanya
 | 
			
		||||
sebaliknya
 | 
			
		||||
banyak
 | 
			
		||||
sebanyak
 | 
			
		||||
beberapa
 | 
			
		||||
seberapa
 | 
			
		||||
begini
 | 
			
		||||
beginian
 | 
			
		||||
beginikah
 | 
			
		||||
beginilah
 | 
			
		||||
sebegini
 | 
			
		||||
begitu
 | 
			
		||||
begitukah
 | 
			
		||||
begitulah
 | 
			
		||||
begitupun
 | 
			
		||||
sebegitu
 | 
			
		||||
belum
 | 
			
		||||
belumlah
 | 
			
		||||
sebelum
 | 
			
		||||
sebelumnya
 | 
			
		||||
sebenarnya
 | 
			
		||||
berapa
 | 
			
		||||
berapakah
 | 
			
		||||
berapalah
 | 
			
		||||
berapapun
 | 
			
		||||
betulkah
 | 
			
		||||
sebetulnya
 | 
			
		||||
biasa
 | 
			
		||||
biasanya
 | 
			
		||||
bila
 | 
			
		||||
bilakah
 | 
			
		||||
bisa
 | 
			
		||||
bisakah
 | 
			
		||||
sebisanya
 | 
			
		||||
boleh
 | 
			
		||||
bolehkah
 | 
			
		||||
bolehlah
 | 
			
		||||
buat
 | 
			
		||||
bukan
 | 
			
		||||
bukankah
 | 
			
		||||
bukanlah
 | 
			
		||||
bukannya
 | 
			
		||||
cuma
 | 
			
		||||
percuma
 | 
			
		||||
dahulu
 | 
			
		||||
dalam
 | 
			
		||||
dan
 | 
			
		||||
dapat
 | 
			
		||||
dari
 | 
			
		||||
daripada
 | 
			
		||||
dekat
 | 
			
		||||
demi
 | 
			
		||||
demikian
 | 
			
		||||
demikianlah
 | 
			
		||||
sedemikian
 | 
			
		||||
dengan
 | 
			
		||||
depan
 | 
			
		||||
di
 | 
			
		||||
dia
 | 
			
		||||
dialah
 | 
			
		||||
dini
 | 
			
		||||
diri
 | 
			
		||||
dirinya
 | 
			
		||||
terdiri
 | 
			
		||||
dong
 | 
			
		||||
dulu
 | 
			
		||||
enggak
 | 
			
		||||
enggaknya
 | 
			
		||||
entah
 | 
			
		||||
entahlah
 | 
			
		||||
terhadap
 | 
			
		||||
terhadapnya
 | 
			
		||||
hal
 | 
			
		||||
hampir
 | 
			
		||||
hanya
 | 
			
		||||
hanyalah
 | 
			
		||||
harus
 | 
			
		||||
haruslah
 | 
			
		||||
harusnya
 | 
			
		||||
seharusnya
 | 
			
		||||
hendak
 | 
			
		||||
hendaklah
 | 
			
		||||
hendaknya
 | 
			
		||||
hingga
 | 
			
		||||
sehingga
 | 
			
		||||
ia
 | 
			
		||||
ialah
 | 
			
		||||
ibarat
 | 
			
		||||
ingin
 | 
			
		||||
inginkah
 | 
			
		||||
inginkan
 | 
			
		||||
ini
 | 
			
		||||
inikah
 | 
			
		||||
inilah
 | 
			
		||||
itu
 | 
			
		||||
itukah
 | 
			
		||||
itulah
 | 
			
		||||
jangan
 | 
			
		||||
jangankan
 | 
			
		||||
janganlah
 | 
			
		||||
jika
 | 
			
		||||
jikalau
 | 
			
		||||
juga
 | 
			
		||||
justru
 | 
			
		||||
kala
 | 
			
		||||
kalau
 | 
			
		||||
kalaulah
 | 
			
		||||
kalaupun
 | 
			
		||||
kalian
 | 
			
		||||
kami
 | 
			
		||||
kamilah
 | 
			
		||||
kamu
 | 
			
		||||
kamulah
 | 
			
		||||
kan
 | 
			
		||||
kapan
 | 
			
		||||
kapankah
 | 
			
		||||
kapanpun
 | 
			
		||||
dikarenakan
 | 
			
		||||
karena
 | 
			
		||||
karenanya
 | 
			
		||||
ke
 | 
			
		||||
kecil
 | 
			
		||||
kemudian
 | 
			
		||||
kenapa
 | 
			
		||||
kepada
 | 
			
		||||
kepadanya
 | 
			
		||||
ketika
 | 
			
		||||
seketika
 | 
			
		||||
khususnya
 | 
			
		||||
kini
 | 
			
		||||
kinilah
 | 
			
		||||
kiranya
 | 
			
		||||
sekiranya
 | 
			
		||||
kita
 | 
			
		||||
kitalah
 | 
			
		||||
kok
 | 
			
		||||
lagi
 | 
			
		||||
lagian
 | 
			
		||||
selagi
 | 
			
		||||
lah
 | 
			
		||||
lain
 | 
			
		||||
lainnya
 | 
			
		||||
melainkan
 | 
			
		||||
selaku
 | 
			
		||||
lalu
 | 
			
		||||
melalui
 | 
			
		||||
terlalu
 | 
			
		||||
lama
 | 
			
		||||
lamanya
 | 
			
		||||
selama
 | 
			
		||||
selama
 | 
			
		||||
selamanya
 | 
			
		||||
lebih
 | 
			
		||||
terlebih
 | 
			
		||||
bermacam
 | 
			
		||||
macam
 | 
			
		||||
semacam
 | 
			
		||||
maka
 | 
			
		||||
makanya
 | 
			
		||||
makin
 | 
			
		||||
malah
 | 
			
		||||
malahan
 | 
			
		||||
mampu
 | 
			
		||||
mampukah
 | 
			
		||||
mana
 | 
			
		||||
manakala
 | 
			
		||||
manalagi
 | 
			
		||||
masih
 | 
			
		||||
masihkah
 | 
			
		||||
semasih
 | 
			
		||||
masing
 | 
			
		||||
mau
 | 
			
		||||
maupun
 | 
			
		||||
semaunya
 | 
			
		||||
memang
 | 
			
		||||
mereka
 | 
			
		||||
merekalah
 | 
			
		||||
meski
 | 
			
		||||
meskipun
 | 
			
		||||
semula
 | 
			
		||||
mungkin
 | 
			
		||||
mungkinkah
 | 
			
		||||
nah
 | 
			
		||||
namun
 | 
			
		||||
nanti
 | 
			
		||||
nantinya
 | 
			
		||||
nyaris
 | 
			
		||||
oleh
 | 
			
		||||
olehnya
 | 
			
		||||
seorang
 | 
			
		||||
seseorang
 | 
			
		||||
pada
 | 
			
		||||
padanya
 | 
			
		||||
padahal
 | 
			
		||||
paling
 | 
			
		||||
sepanjang
 | 
			
		||||
pantas
 | 
			
		||||
sepantasnya
 | 
			
		||||
sepantasnyalah
 | 
			
		||||
para
 | 
			
		||||
pasti
 | 
			
		||||
pastilah
 | 
			
		||||
per
 | 
			
		||||
pernah
 | 
			
		||||
pula
 | 
			
		||||
pun
 | 
			
		||||
merupakan
 | 
			
		||||
rupanya
 | 
			
		||||
serupa
 | 
			
		||||
saat
 | 
			
		||||
saatnya
 | 
			
		||||
sesaat
 | 
			
		||||
saja
 | 
			
		||||
sajalah
 | 
			
		||||
saling
 | 
			
		||||
bersama
 | 
			
		||||
sama
 | 
			
		||||
sesama
 | 
			
		||||
sambil
 | 
			
		||||
sampai
 | 
			
		||||
sana
 | 
			
		||||
sangat
 | 
			
		||||
sangatlah
 | 
			
		||||
saya
 | 
			
		||||
sayalah
 | 
			
		||||
se
 | 
			
		||||
sebab
 | 
			
		||||
sebabnya
 | 
			
		||||
sebuah
 | 
			
		||||
tersebut
 | 
			
		||||
tersebutlah
 | 
			
		||||
sedang
 | 
			
		||||
sedangkan
 | 
			
		||||
sedikit
 | 
			
		||||
sedikitnya
 | 
			
		||||
segala
 | 
			
		||||
segalanya
 | 
			
		||||
segera
 | 
			
		||||
sesegera
 | 
			
		||||
sejak
 | 
			
		||||
sejenak
 | 
			
		||||
sekali
 | 
			
		||||
sekalian
 | 
			
		||||
sekalipun
 | 
			
		||||
sesekali
 | 
			
		||||
sekaligus
 | 
			
		||||
sekarang
 | 
			
		||||
sekarang
 | 
			
		||||
sekitar
 | 
			
		||||
sekitarnya
 | 
			
		||||
sela
 | 
			
		||||
selain
 | 
			
		||||
selalu
 | 
			
		||||
seluruh
 | 
			
		||||
seluruhnya
 | 
			
		||||
semakin
 | 
			
		||||
sementara
 | 
			
		||||
sempat
 | 
			
		||||
semua
 | 
			
		||||
semuanya
 | 
			
		||||
sendiri
 | 
			
		||||
sendirinya
 | 
			
		||||
seolah
 | 
			
		||||
seperti
 | 
			
		||||
sepertinya
 | 
			
		||||
sering
 | 
			
		||||
seringnya
 | 
			
		||||
serta
 | 
			
		||||
siapa
 | 
			
		||||
siapakah
 | 
			
		||||
siapapun
 | 
			
		||||
disini
 | 
			
		||||
disinilah
 | 
			
		||||
sini
 | 
			
		||||
sinilah
 | 
			
		||||
sesuatu
 | 
			
		||||
sesuatunya
 | 
			
		||||
suatu
 | 
			
		||||
sesudah
 | 
			
		||||
sesudahnya
 | 
			
		||||
sudah
 | 
			
		||||
sudahkah
 | 
			
		||||
sudahlah
 | 
			
		||||
supaya
 | 
			
		||||
tadi
 | 
			
		||||
tadinya
 | 
			
		||||
tak
 | 
			
		||||
tanpa
 | 
			
		||||
setelah
 | 
			
		||||
telah
 | 
			
		||||
tentang
 | 
			
		||||
tentu
 | 
			
		||||
tentulah
 | 
			
		||||
tentunya
 | 
			
		||||
tertentu
 | 
			
		||||
seterusnya
 | 
			
		||||
tapi
 | 
			
		||||
tetapi
 | 
			
		||||
setiap
 | 
			
		||||
tiap
 | 
			
		||||
setidaknya
 | 
			
		||||
tidak
 | 
			
		||||
tidakkah
 | 
			
		||||
tidaklah
 | 
			
		||||
toh
 | 
			
		||||
waduh
 | 
			
		||||
wah
 | 
			
		||||
wahai
 | 
			
		||||
sewaktu
 | 
			
		||||
walau
 | 
			
		||||
walaupun
 | 
			
		||||
wong
 | 
			
		||||
yaitu
 | 
			
		||||
yakni
 | 
			
		||||
yang
 | 
			
		||||
							
								
								
									
										303
									
								
								archiver/solr-config-dir/lang/stopwords_it.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										303
									
								
								archiver/solr-config-dir/lang/stopwords_it.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,303 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | An Italian stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
ad             |  a (to) before vowel
 | 
			
		||||
al             |  a + il
 | 
			
		||||
allo           |  a + lo
 | 
			
		||||
ai             |  a + i
 | 
			
		||||
agli           |  a + gli
 | 
			
		||||
all            |  a + l'
 | 
			
		||||
agl            |  a + gl'
 | 
			
		||||
alla           |  a + la
 | 
			
		||||
alle           |  a + le
 | 
			
		||||
con            |  with
 | 
			
		||||
col            |  con + il
 | 
			
		||||
coi            |  con + i (forms collo, cogli etc are now very rare)
 | 
			
		||||
da             |  from
 | 
			
		||||
dal            |  da + il
 | 
			
		||||
dallo          |  da + lo
 | 
			
		||||
dai            |  da + i
 | 
			
		||||
dagli          |  da + gli
 | 
			
		||||
dall           |  da + l'
 | 
			
		||||
dagl           |  da + gll'
 | 
			
		||||
dalla          |  da + la
 | 
			
		||||
dalle          |  da + le
 | 
			
		||||
di             |  of
 | 
			
		||||
del            |  di + il
 | 
			
		||||
dello          |  di + lo
 | 
			
		||||
dei            |  di + i
 | 
			
		||||
degli          |  di + gli
 | 
			
		||||
dell           |  di + l'
 | 
			
		||||
degl           |  di + gl'
 | 
			
		||||
della          |  di + la
 | 
			
		||||
delle          |  di + le
 | 
			
		||||
in             |  in
 | 
			
		||||
nel            |  in + el
 | 
			
		||||
nello          |  in + lo
 | 
			
		||||
nei            |  in + i
 | 
			
		||||
negli          |  in + gli
 | 
			
		||||
nell           |  in + l'
 | 
			
		||||
negl           |  in + gl'
 | 
			
		||||
nella          |  in + la
 | 
			
		||||
nelle          |  in + le
 | 
			
		||||
su             |  on
 | 
			
		||||
sul            |  su + il
 | 
			
		||||
sullo          |  su + lo
 | 
			
		||||
sui            |  su + i
 | 
			
		||||
sugli          |  su + gli
 | 
			
		||||
sull           |  su + l'
 | 
			
		||||
sugl           |  su + gl'
 | 
			
		||||
sulla          |  su + la
 | 
			
		||||
sulle          |  su + le
 | 
			
		||||
per            |  through, by
 | 
			
		||||
tra            |  among
 | 
			
		||||
contro         |  against
 | 
			
		||||
io             |  I
 | 
			
		||||
tu             |  thou
 | 
			
		||||
lui            |  he
 | 
			
		||||
lei            |  she
 | 
			
		||||
noi            |  we
 | 
			
		||||
voi            |  you
 | 
			
		||||
loro           |  they
 | 
			
		||||
mio            |  my
 | 
			
		||||
mia            |
 | 
			
		||||
miei           |
 | 
			
		||||
mie            |
 | 
			
		||||
tuo            |
 | 
			
		||||
tua            |
 | 
			
		||||
tuoi           |  thy
 | 
			
		||||
tue            |
 | 
			
		||||
suo            |
 | 
			
		||||
sua            |
 | 
			
		||||
suoi           |  his, her
 | 
			
		||||
sue            |
 | 
			
		||||
nostro         |  our
 | 
			
		||||
nostra         |
 | 
			
		||||
nostri         |
 | 
			
		||||
nostre         |
 | 
			
		||||
vostro         |  your
 | 
			
		||||
vostra         |
 | 
			
		||||
vostri         |
 | 
			
		||||
vostre         |
 | 
			
		||||
mi             |  me
 | 
			
		||||
ti             |  thee
 | 
			
		||||
ci             |  us, there
 | 
			
		||||
vi             |  you, there
 | 
			
		||||
lo             |  him, the
 | 
			
		||||
la             |  her, the
 | 
			
		||||
li             |  them
 | 
			
		||||
le             |  them, the
 | 
			
		||||
gli            |  to him, the
 | 
			
		||||
ne             |  from there etc
 | 
			
		||||
il             |  the
 | 
			
		||||
un             |  a
 | 
			
		||||
uno            |  a
 | 
			
		||||
una            |  a
 | 
			
		||||
ma             |  but
 | 
			
		||||
ed             |  and
 | 
			
		||||
se             |  if
 | 
			
		||||
perché         |  why, because
 | 
			
		||||
anche          |  also
 | 
			
		||||
come           |  how
 | 
			
		||||
dov            |  where (as dov')
 | 
			
		||||
dove           |  where
 | 
			
		||||
che            |  who, that
 | 
			
		||||
chi            |  who
 | 
			
		||||
cui            |  whom
 | 
			
		||||
non            |  not
 | 
			
		||||
più            |  more
 | 
			
		||||
quale          |  who, that
 | 
			
		||||
quanto         |  how much
 | 
			
		||||
quanti         |
 | 
			
		||||
quanta         |
 | 
			
		||||
quante         |
 | 
			
		||||
quello         |  that
 | 
			
		||||
quelli         |
 | 
			
		||||
quella         |
 | 
			
		||||
quelle         |
 | 
			
		||||
questo         |  this
 | 
			
		||||
questi         |
 | 
			
		||||
questa         |
 | 
			
		||||
queste         |
 | 
			
		||||
si             |  yes
 | 
			
		||||
tutto          |  all
 | 
			
		||||
tutti          |  all
 | 
			
		||||
 | 
			
		||||
               |  single letter forms:
 | 
			
		||||
 | 
			
		||||
a              |  at
 | 
			
		||||
c              |  as c' for ce or ci
 | 
			
		||||
e              |  and
 | 
			
		||||
i              |  the
 | 
			
		||||
l              |  as l'
 | 
			
		||||
o              |  or
 | 
			
		||||
 | 
			
		||||
               | forms of avere, to have (not including the infinitive):
 | 
			
		||||
 | 
			
		||||
ho
 | 
			
		||||
hai
 | 
			
		||||
ha
 | 
			
		||||
abbiamo
 | 
			
		||||
avete
 | 
			
		||||
hanno
 | 
			
		||||
abbia
 | 
			
		||||
abbiate
 | 
			
		||||
abbiano
 | 
			
		||||
avrò
 | 
			
		||||
avrai
 | 
			
		||||
avrà
 | 
			
		||||
avremo
 | 
			
		||||
avrete
 | 
			
		||||
avranno
 | 
			
		||||
avrei
 | 
			
		||||
avresti
 | 
			
		||||
avrebbe
 | 
			
		||||
avremmo
 | 
			
		||||
avreste
 | 
			
		||||
avrebbero
 | 
			
		||||
avevo
 | 
			
		||||
avevi
 | 
			
		||||
aveva
 | 
			
		||||
avevamo
 | 
			
		||||
avevate
 | 
			
		||||
avevano
 | 
			
		||||
ebbi
 | 
			
		||||
avesti
 | 
			
		||||
ebbe
 | 
			
		||||
avemmo
 | 
			
		||||
aveste
 | 
			
		||||
ebbero
 | 
			
		||||
avessi
 | 
			
		||||
avesse
 | 
			
		||||
avessimo
 | 
			
		||||
avessero
 | 
			
		||||
avendo
 | 
			
		||||
avuto
 | 
			
		||||
avuta
 | 
			
		||||
avuti
 | 
			
		||||
avute
 | 
			
		||||
 | 
			
		||||
               | forms of essere, to be (not including the infinitive):
 | 
			
		||||
sono
 | 
			
		||||
sei
 | 
			
		||||
è
 | 
			
		||||
siamo
 | 
			
		||||
siete
 | 
			
		||||
sia
 | 
			
		||||
siate
 | 
			
		||||
siano
 | 
			
		||||
sarò
 | 
			
		||||
sarai
 | 
			
		||||
sarà
 | 
			
		||||
saremo
 | 
			
		||||
sarete
 | 
			
		||||
saranno
 | 
			
		||||
sarei
 | 
			
		||||
saresti
 | 
			
		||||
sarebbe
 | 
			
		||||
saremmo
 | 
			
		||||
sareste
 | 
			
		||||
sarebbero
 | 
			
		||||
ero
 | 
			
		||||
eri
 | 
			
		||||
era
 | 
			
		||||
eravamo
 | 
			
		||||
eravate
 | 
			
		||||
erano
 | 
			
		||||
fui
 | 
			
		||||
fosti
 | 
			
		||||
fu
 | 
			
		||||
fummo
 | 
			
		||||
foste
 | 
			
		||||
furono
 | 
			
		||||
fossi
 | 
			
		||||
fosse
 | 
			
		||||
fossimo
 | 
			
		||||
fossero
 | 
			
		||||
essendo
 | 
			
		||||
 | 
			
		||||
               | forms of fare, to do (not including the infinitive, fa, fat-):
 | 
			
		||||
faccio
 | 
			
		||||
fai
 | 
			
		||||
facciamo
 | 
			
		||||
fanno
 | 
			
		||||
faccia
 | 
			
		||||
facciate
 | 
			
		||||
facciano
 | 
			
		||||
farò
 | 
			
		||||
farai
 | 
			
		||||
farà
 | 
			
		||||
faremo
 | 
			
		||||
farete
 | 
			
		||||
faranno
 | 
			
		||||
farei
 | 
			
		||||
faresti
 | 
			
		||||
farebbe
 | 
			
		||||
faremmo
 | 
			
		||||
fareste
 | 
			
		||||
farebbero
 | 
			
		||||
facevo
 | 
			
		||||
facevi
 | 
			
		||||
faceva
 | 
			
		||||
facevamo
 | 
			
		||||
facevate
 | 
			
		||||
facevano
 | 
			
		||||
feci
 | 
			
		||||
facesti
 | 
			
		||||
fece
 | 
			
		||||
facemmo
 | 
			
		||||
faceste
 | 
			
		||||
fecero
 | 
			
		||||
facessi
 | 
			
		||||
facesse
 | 
			
		||||
facessimo
 | 
			
		||||
facessero
 | 
			
		||||
facendo
 | 
			
		||||
 | 
			
		||||
               | forms of stare, to be (not including the infinitive):
 | 
			
		||||
sto
 | 
			
		||||
stai
 | 
			
		||||
sta
 | 
			
		||||
stiamo
 | 
			
		||||
stanno
 | 
			
		||||
stia
 | 
			
		||||
stiate
 | 
			
		||||
stiano
 | 
			
		||||
starò
 | 
			
		||||
starai
 | 
			
		||||
starà
 | 
			
		||||
staremo
 | 
			
		||||
starete
 | 
			
		||||
staranno
 | 
			
		||||
starei
 | 
			
		||||
staresti
 | 
			
		||||
starebbe
 | 
			
		||||
staremmo
 | 
			
		||||
stareste
 | 
			
		||||
starebbero
 | 
			
		||||
stavo
 | 
			
		||||
stavi
 | 
			
		||||
stava
 | 
			
		||||
stavamo
 | 
			
		||||
stavate
 | 
			
		||||
stavano
 | 
			
		||||
stetti
 | 
			
		||||
stesti
 | 
			
		||||
stette
 | 
			
		||||
stemmo
 | 
			
		||||
steste
 | 
			
		||||
stettero
 | 
			
		||||
stessi
 | 
			
		||||
stesse
 | 
			
		||||
stessimo
 | 
			
		||||
stessero
 | 
			
		||||
stando
 | 
			
		||||
							
								
								
									
										127
									
								
								archiver/solr-config-dir/lang/stopwords_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								archiver/solr-config-dir/lang/stopwords_ja.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,127 @@
 | 
			
		|||
#
 | 
			
		||||
# This file defines a stopword set for Japanese.
 | 
			
		||||
#
 | 
			
		||||
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
 | 
			
		||||
# Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745
 | 
			
		||||
# for frequency lists, etc. that can be useful for making your own set (if desired)
 | 
			
		||||
#
 | 
			
		||||
# Note that there is an overlap between these stopwords and the terms stopped when used
 | 
			
		||||
# in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note
 | 
			
		||||
# that comments are not allowed on the same line as stopwords.
 | 
			
		||||
#
 | 
			
		||||
# Also note that stopping is done in a case-insensitive manner.  Change your StopFilter
 | 
			
		||||
# configuration if you need case-sensitive stopping.  Lastly, note that stopping is done
 | 
			
		||||
# using the same character width as the entries in this file.  Since this StopFilter is
 | 
			
		||||
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
 | 
			
		||||
# entries to be in half-width and your kana entries to be in full-width.
 | 
			
		||||
#
 | 
			
		||||
の
 | 
			
		||||
に
 | 
			
		||||
は
 | 
			
		||||
を
 | 
			
		||||
た
 | 
			
		||||
が
 | 
			
		||||
で
 | 
			
		||||
て
 | 
			
		||||
と
 | 
			
		||||
し
 | 
			
		||||
れ
 | 
			
		||||
さ
 | 
			
		||||
ある
 | 
			
		||||
いる
 | 
			
		||||
も
 | 
			
		||||
する
 | 
			
		||||
から
 | 
			
		||||
な
 | 
			
		||||
こと
 | 
			
		||||
として
 | 
			
		||||
い
 | 
			
		||||
や
 | 
			
		||||
れる
 | 
			
		||||
など
 | 
			
		||||
なっ
 | 
			
		||||
ない
 | 
			
		||||
この
 | 
			
		||||
ため
 | 
			
		||||
その
 | 
			
		||||
あっ
 | 
			
		||||
よう
 | 
			
		||||
また
 | 
			
		||||
もの
 | 
			
		||||
という
 | 
			
		||||
あり
 | 
			
		||||
まで
 | 
			
		||||
られ
 | 
			
		||||
なる
 | 
			
		||||
へ
 | 
			
		||||
か
 | 
			
		||||
だ
 | 
			
		||||
これ
 | 
			
		||||
によって
 | 
			
		||||
により
 | 
			
		||||
おり
 | 
			
		||||
より
 | 
			
		||||
による
 | 
			
		||||
ず
 | 
			
		||||
なり
 | 
			
		||||
られる
 | 
			
		||||
において
 | 
			
		||||
ば
 | 
			
		||||
なかっ
 | 
			
		||||
なく
 | 
			
		||||
しかし
 | 
			
		||||
について
 | 
			
		||||
せ
 | 
			
		||||
だっ
 | 
			
		||||
その後
 | 
			
		||||
できる
 | 
			
		||||
それ
 | 
			
		||||
う
 | 
			
		||||
ので
 | 
			
		||||
なお
 | 
			
		||||
のみ
 | 
			
		||||
でき
 | 
			
		||||
き
 | 
			
		||||
つ
 | 
			
		||||
における
 | 
			
		||||
および
 | 
			
		||||
いう
 | 
			
		||||
さらに
 | 
			
		||||
でも
 | 
			
		||||
ら
 | 
			
		||||
たり
 | 
			
		||||
その他
 | 
			
		||||
に関する
 | 
			
		||||
たち
 | 
			
		||||
ます
 | 
			
		||||
ん
 | 
			
		||||
なら
 | 
			
		||||
に対して
 | 
			
		||||
特に
 | 
			
		||||
せる
 | 
			
		||||
及び
 | 
			
		||||
これら
 | 
			
		||||
とき
 | 
			
		||||
では
 | 
			
		||||
にて
 | 
			
		||||
ほか
 | 
			
		||||
ながら
 | 
			
		||||
うち
 | 
			
		||||
そして
 | 
			
		||||
とともに
 | 
			
		||||
ただし
 | 
			
		||||
かつて
 | 
			
		||||
それぞれ
 | 
			
		||||
または
 | 
			
		||||
お
 | 
			
		||||
ほど
 | 
			
		||||
ものの
 | 
			
		||||
に対する
 | 
			
		||||
ほとんど
 | 
			
		||||
と共に
 | 
			
		||||
といった
 | 
			
		||||
です
 | 
			
		||||
とも
 | 
			
		||||
ところ
 | 
			
		||||
ここ
 | 
			
		||||
##### End of file
 | 
			
		||||
							
								
								
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_lv.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_lv.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,172 @@
 | 
			
		|||
# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
 | 
			
		||||
# the original list of over 800 forms was refined: 
 | 
			
		||||
#   pronouns, adverbs, interjections were removed
 | 
			
		||||
# 
 | 
			
		||||
# prepositions
 | 
			
		||||
aiz
 | 
			
		||||
ap
 | 
			
		||||
ar
 | 
			
		||||
apakš
 | 
			
		||||
ārpus
 | 
			
		||||
augšpus
 | 
			
		||||
bez
 | 
			
		||||
caur
 | 
			
		||||
dēļ
 | 
			
		||||
gar
 | 
			
		||||
iekš
 | 
			
		||||
iz
 | 
			
		||||
kopš
 | 
			
		||||
labad
 | 
			
		||||
lejpus
 | 
			
		||||
līdz
 | 
			
		||||
no
 | 
			
		||||
otrpus
 | 
			
		||||
pa
 | 
			
		||||
par
 | 
			
		||||
pār
 | 
			
		||||
pēc
 | 
			
		||||
pie
 | 
			
		||||
pirms
 | 
			
		||||
pret
 | 
			
		||||
priekš
 | 
			
		||||
starp
 | 
			
		||||
šaipus
 | 
			
		||||
uz
 | 
			
		||||
viņpus
 | 
			
		||||
virs
 | 
			
		||||
virspus
 | 
			
		||||
zem
 | 
			
		||||
apakšpus
 | 
			
		||||
# Conjunctions
 | 
			
		||||
un
 | 
			
		||||
bet
 | 
			
		||||
jo
 | 
			
		||||
ja
 | 
			
		||||
ka
 | 
			
		||||
lai
 | 
			
		||||
tomēr
 | 
			
		||||
tikko
 | 
			
		||||
turpretī
 | 
			
		||||
arī
 | 
			
		||||
kaut
 | 
			
		||||
gan
 | 
			
		||||
tādēļ
 | 
			
		||||
tā
 | 
			
		||||
ne
 | 
			
		||||
tikvien
 | 
			
		||||
vien
 | 
			
		||||
kā
 | 
			
		||||
ir
 | 
			
		||||
te
 | 
			
		||||
vai
 | 
			
		||||
kamēr
 | 
			
		||||
# Particles
 | 
			
		||||
ar
 | 
			
		||||
diezin
 | 
			
		||||
droši
 | 
			
		||||
diemžēl
 | 
			
		||||
nebūt
 | 
			
		||||
ik
 | 
			
		||||
it
 | 
			
		||||
taču
 | 
			
		||||
nu
 | 
			
		||||
pat
 | 
			
		||||
tiklab
 | 
			
		||||
iekšpus
 | 
			
		||||
nedz
 | 
			
		||||
tik
 | 
			
		||||
nevis
 | 
			
		||||
turpretim
 | 
			
		||||
jeb
 | 
			
		||||
iekam
 | 
			
		||||
iekām
 | 
			
		||||
iekāms
 | 
			
		||||
kolīdz
 | 
			
		||||
līdzko
 | 
			
		||||
tiklīdz
 | 
			
		||||
jebšu
 | 
			
		||||
tālab
 | 
			
		||||
tāpēc
 | 
			
		||||
nekā
 | 
			
		||||
itin
 | 
			
		||||
jā
 | 
			
		||||
jau
 | 
			
		||||
jel
 | 
			
		||||
nē
 | 
			
		||||
nezin
 | 
			
		||||
tad
 | 
			
		||||
tikai
 | 
			
		||||
vis
 | 
			
		||||
tak
 | 
			
		||||
iekams
 | 
			
		||||
vien
 | 
			
		||||
# modal verbs
 | 
			
		||||
būt  
 | 
			
		||||
biju 
 | 
			
		||||
biji
 | 
			
		||||
bija
 | 
			
		||||
bijām
 | 
			
		||||
bijāt
 | 
			
		||||
esmu
 | 
			
		||||
esi
 | 
			
		||||
esam
 | 
			
		||||
esat 
 | 
			
		||||
būšu     
 | 
			
		||||
būsi
 | 
			
		||||
būs
 | 
			
		||||
būsim
 | 
			
		||||
būsiet
 | 
			
		||||
tikt
 | 
			
		||||
tiku
 | 
			
		||||
tiki
 | 
			
		||||
tika
 | 
			
		||||
tikām
 | 
			
		||||
tikāt
 | 
			
		||||
tieku
 | 
			
		||||
tiec
 | 
			
		||||
tiek
 | 
			
		||||
tiekam
 | 
			
		||||
tiekat
 | 
			
		||||
tikšu
 | 
			
		||||
tiks
 | 
			
		||||
tiksim
 | 
			
		||||
tiksiet
 | 
			
		||||
tapt
 | 
			
		||||
tapi
 | 
			
		||||
tapāt
 | 
			
		||||
topat
 | 
			
		||||
tapšu
 | 
			
		||||
tapsi
 | 
			
		||||
taps
 | 
			
		||||
tapsim
 | 
			
		||||
tapsiet
 | 
			
		||||
kļūt
 | 
			
		||||
kļuvu
 | 
			
		||||
kļuvi
 | 
			
		||||
kļuva
 | 
			
		||||
kļuvām
 | 
			
		||||
kļuvāt
 | 
			
		||||
kļūstu
 | 
			
		||||
kļūsti
 | 
			
		||||
kļūst
 | 
			
		||||
kļūstam
 | 
			
		||||
kļūstat
 | 
			
		||||
kļūšu
 | 
			
		||||
kļūsi
 | 
			
		||||
kļūs
 | 
			
		||||
kļūsim
 | 
			
		||||
kļūsiet
 | 
			
		||||
# verbs
 | 
			
		||||
varēt
 | 
			
		||||
varēju
 | 
			
		||||
varējām
 | 
			
		||||
varēšu
 | 
			
		||||
varēsim
 | 
			
		||||
var
 | 
			
		||||
varēji
 | 
			
		||||
varējāt
 | 
			
		||||
varēsi
 | 
			
		||||
varēsiet
 | 
			
		||||
varat
 | 
			
		||||
varēja
 | 
			
		||||
varēs
 | 
			
		||||
							
								
								
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_nl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_nl.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,119 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Dutch stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | This is a ranked list (commonest to rarest) of stopwords derived from
 | 
			
		||||
 | a large sample of Dutch text.
 | 
			
		||||
 | 
			
		||||
 | Dutch stop words frequently exhibit homonym clashes. These are indicated
 | 
			
		||||
 | clearly below.
 | 
			
		||||
 | 
			
		||||
de             |  the
 | 
			
		||||
en             |  and
 | 
			
		||||
van            |  of, from
 | 
			
		||||
ik             |  I, the ego
 | 
			
		||||
te             |  (1) chez, at etc, (2) to, (3) too
 | 
			
		||||
dat            |  that, which
 | 
			
		||||
die            |  that, those, who, which
 | 
			
		||||
in             |  in, inside
 | 
			
		||||
een            |  a, an, one
 | 
			
		||||
hij            |  he
 | 
			
		||||
het            |  the, it
 | 
			
		||||
niet           |  not, nothing, naught
 | 
			
		||||
zijn           |  (1) to be, being, (2) his, one's, its
 | 
			
		||||
is             |  is
 | 
			
		||||
was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
 | 
			
		||||
op             |  on, upon, at, in, up, used up
 | 
			
		||||
aan            |  on, upon, to (as dative)
 | 
			
		||||
met            |  with, by
 | 
			
		||||
als            |  like, such as, when
 | 
			
		||||
voor           |  (1) before, in front of, (2) furrow
 | 
			
		||||
had            |  had, past tense all persons sing. of 'hebben' (have)
 | 
			
		||||
er             |  there
 | 
			
		||||
maar           |  but, only
 | 
			
		||||
om             |  round, about, for etc
 | 
			
		||||
hem            |  him
 | 
			
		||||
dan            |  then
 | 
			
		||||
zou            |  should/would, past tense all persons sing. of 'zullen'
 | 
			
		||||
of             |  or, whether, if
 | 
			
		||||
wat            |  what, something, anything
 | 
			
		||||
mijn           |  possessive and noun 'mine'
 | 
			
		||||
men            |  people, 'one'
 | 
			
		||||
dit            |  this
 | 
			
		||||
zo             |  so, thus, in this way
 | 
			
		||||
door           |  through by
 | 
			
		||||
over           |  over, across
 | 
			
		||||
ze             |  she, her, they, them
 | 
			
		||||
zich           |  oneself
 | 
			
		||||
bij            |  (1) a bee, (2) by, near, at
 | 
			
		||||
ook            |  also, too
 | 
			
		||||
tot            |  till, until
 | 
			
		||||
je             |  you
 | 
			
		||||
mij            |  me
 | 
			
		||||
uit            |  out of, from
 | 
			
		||||
der            |  Old Dutch form of 'van der' still found in surnames
 | 
			
		||||
daar           |  (1) there, (2) because
 | 
			
		||||
haar           |  (1) her, their, them, (2) hair
 | 
			
		||||
naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
 | 
			
		||||
heb            |  present first person sing. of 'to have'
 | 
			
		||||
hoe            |  how, why
 | 
			
		||||
heeft          |  present third person sing. of 'to have'
 | 
			
		||||
hebben         |  'to have' and various parts thereof
 | 
			
		||||
deze           |  this
 | 
			
		||||
u              |  you
 | 
			
		||||
want           |  (1) for, (2) mitten, (3) rigging
 | 
			
		||||
nog            |  yet, still
 | 
			
		||||
zal            |  'shall', first and third person sing. of verb 'zullen' (will)
 | 
			
		||||
me             |  me
 | 
			
		||||
zij            |  she, they
 | 
			
		||||
nu             |  now
 | 
			
		||||
ge             |  'thou', still used in Belgium and south Netherlands
 | 
			
		||||
geen           |  none
 | 
			
		||||
omdat          |  because
 | 
			
		||||
iets           |  something, somewhat
 | 
			
		||||
worden         |  to become, grow, get
 | 
			
		||||
toch           |  yet, still
 | 
			
		||||
al             |  all, every, each
 | 
			
		||||
waren          |  (1) 'were' (2) to wander, (3) wares, (3)
 | 
			
		||||
veel           |  much, many
 | 
			
		||||
meer           |  (1) more, (2) lake
 | 
			
		||||
doen           |  to do, to make
 | 
			
		||||
toen           |  then, when
 | 
			
		||||
moet           |  noun 'spot/mote' and present form of 'to must'
 | 
			
		||||
ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
 | 
			
		||||
zonder         |  without
 | 
			
		||||
kan            |  noun 'can' and present form of 'to be able'
 | 
			
		||||
hun            |  their, them
 | 
			
		||||
dus            |  so, consequently
 | 
			
		||||
alles          |  all, everything, anything
 | 
			
		||||
onder          |  under, beneath
 | 
			
		||||
ja             |  yes, of course
 | 
			
		||||
eens           |  once, one day
 | 
			
		||||
hier           |  here
 | 
			
		||||
wie            |  who
 | 
			
		||||
werd           |  imperfect third person sing. of 'become'
 | 
			
		||||
altijd         |  always
 | 
			
		||||
doch           |  yet, but etc
 | 
			
		||||
wordt          |  present third person sing. of 'become'
 | 
			
		||||
wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
 | 
			
		||||
kunnen         |  to be able
 | 
			
		||||
ons            |  us/our
 | 
			
		||||
zelf           |  self
 | 
			
		||||
tegen          |  against, towards, at
 | 
			
		||||
na             |  after, near
 | 
			
		||||
reeds          |  already
 | 
			
		||||
wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
 | 
			
		||||
kon            |  could; past tense of 'to be able'
 | 
			
		||||
niets          |  nothing
 | 
			
		||||
uw             |  your
 | 
			
		||||
iemand         |  somebody
 | 
			
		||||
geweest        |  been; past participle of 'be'
 | 
			
		||||
andere         |  other
 | 
			
		||||
							
								
								
									
										194
									
								
								archiver/solr-config-dir/lang/stopwords_no.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								archiver/solr-config-dir/lang/stopwords_no.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,194 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Norwegian stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | This stop word list is for the dominant bokmål dialect. Words unique
 | 
			
		||||
 | to nynorsk are marked *.
 | 
			
		||||
 | 
			
		||||
 | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
 | 
			
		||||
 | 
			
		||||
og             | and
 | 
			
		||||
i              | in
 | 
			
		||||
jeg            | I
 | 
			
		||||
det            | it/this/that
 | 
			
		||||
at             | to (w. inf.)
 | 
			
		||||
en             | a/an
 | 
			
		||||
et             | a/an
 | 
			
		||||
den            | it/this/that
 | 
			
		||||
til            | to
 | 
			
		||||
er             | is/am/are
 | 
			
		||||
som            | who/that
 | 
			
		||||
på             | on
 | 
			
		||||
de             | they / you(formal)
 | 
			
		||||
med            | with
 | 
			
		||||
han            | he
 | 
			
		||||
av             | of
 | 
			
		||||
ikke           | not
 | 
			
		||||
ikkje          | not *
 | 
			
		||||
der            | there
 | 
			
		||||
så             | so
 | 
			
		||||
var            | was/were
 | 
			
		||||
meg            | me
 | 
			
		||||
seg            | you
 | 
			
		||||
men            | but
 | 
			
		||||
ett            | one
 | 
			
		||||
har            | have
 | 
			
		||||
om             | about
 | 
			
		||||
vi             | we
 | 
			
		||||
min            | my
 | 
			
		||||
mitt           | my
 | 
			
		||||
ha             | have
 | 
			
		||||
hadde          | had
 | 
			
		||||
hun            | she
 | 
			
		||||
nå             | now
 | 
			
		||||
over           | over
 | 
			
		||||
da             | when/as
 | 
			
		||||
ved            | by/know
 | 
			
		||||
fra            | from
 | 
			
		||||
du             | you
 | 
			
		||||
ut             | out
 | 
			
		||||
sin            | your
 | 
			
		||||
dem            | them
 | 
			
		||||
oss            | us
 | 
			
		||||
opp            | up
 | 
			
		||||
man            | you/one
 | 
			
		||||
kan            | can
 | 
			
		||||
hans           | his
 | 
			
		||||
hvor           | where
 | 
			
		||||
eller          | or
 | 
			
		||||
hva            | what
 | 
			
		||||
skal           | shall/must
 | 
			
		||||
selv           | self (reflective)
 | 
			
		||||
sjøl           | self (reflective)
 | 
			
		||||
her            | here
 | 
			
		||||
alle           | all
 | 
			
		||||
vil            | will
 | 
			
		||||
bli            | become
 | 
			
		||||
ble            | became
 | 
			
		||||
blei           | became *
 | 
			
		||||
blitt          | have become
 | 
			
		||||
kunne          | could
 | 
			
		||||
inn            | in
 | 
			
		||||
når            | when
 | 
			
		||||
være           | be
 | 
			
		||||
kom            | come
 | 
			
		||||
noen           | some
 | 
			
		||||
noe            | some
 | 
			
		||||
ville          | would
 | 
			
		||||
dere           | you
 | 
			
		||||
som            | who/which/that
 | 
			
		||||
deres          | their/theirs
 | 
			
		||||
kun            | only/just
 | 
			
		||||
ja             | yes
 | 
			
		||||
etter          | after
 | 
			
		||||
ned            | down
 | 
			
		||||
skulle         | should
 | 
			
		||||
denne          | this
 | 
			
		||||
for            | for/because
 | 
			
		||||
deg            | you
 | 
			
		||||
si             | hers/his
 | 
			
		||||
sine           | hers/his
 | 
			
		||||
sitt           | hers/his
 | 
			
		||||
mot            | against
 | 
			
		||||
å              | to
 | 
			
		||||
meget          | much
 | 
			
		||||
hvorfor        | why
 | 
			
		||||
dette          | this
 | 
			
		||||
disse          | these/those
 | 
			
		||||
uten           | without
 | 
			
		||||
hvordan        | how
 | 
			
		||||
ingen          | none
 | 
			
		||||
din            | your
 | 
			
		||||
ditt           | your
 | 
			
		||||
blir           | become
 | 
			
		||||
samme          | same
 | 
			
		||||
hvilken        | which
 | 
			
		||||
hvilke         | which (plural)
 | 
			
		||||
sånn           | such a
 | 
			
		||||
inni           | inside/within
 | 
			
		||||
mellom         | between
 | 
			
		||||
vår            | our
 | 
			
		||||
hver           | each
 | 
			
		||||
hvem           | who
 | 
			
		||||
vors           | us/ours
 | 
			
		||||
hvis           | whose
 | 
			
		||||
både           | both
 | 
			
		||||
bare           | only/just
 | 
			
		||||
enn            | than
 | 
			
		||||
fordi          | as/because
 | 
			
		||||
før            | before
 | 
			
		||||
mange          | many
 | 
			
		||||
også           | also
 | 
			
		||||
slik           | just
 | 
			
		||||
vært           | been
 | 
			
		||||
være           | to be
 | 
			
		||||
båe            | both *
 | 
			
		||||
begge          | both
 | 
			
		||||
siden          | since
 | 
			
		||||
dykk           | your *
 | 
			
		||||
dykkar         | yours *
 | 
			
		||||
dei            | they *
 | 
			
		||||
deira          | them *
 | 
			
		||||
deires         | theirs *
 | 
			
		||||
deim           | them *
 | 
			
		||||
di             | your (fem.) *
 | 
			
		||||
då             | as/when *
 | 
			
		||||
eg             | I *
 | 
			
		||||
ein            | a/an *
 | 
			
		||||
eit            | a/an *
 | 
			
		||||
eitt           | a/an *
 | 
			
		||||
elles          | or *
 | 
			
		||||
honom          | he *
 | 
			
		||||
hjå            | at *
 | 
			
		||||
ho             | she *
 | 
			
		||||
hoe            | she *
 | 
			
		||||
henne          | her
 | 
			
		||||
hennar         | her/hers
 | 
			
		||||
hennes         | hers
 | 
			
		||||
hoss           | how *
 | 
			
		||||
hossen         | how *
 | 
			
		||||
ikkje          | not *
 | 
			
		||||
ingi           | noone *
 | 
			
		||||
inkje          | noone *
 | 
			
		||||
korleis        | how *
 | 
			
		||||
korso          | how *
 | 
			
		||||
kva            | what/which *
 | 
			
		||||
kvar           | where *
 | 
			
		||||
kvarhelst      | where *
 | 
			
		||||
kven           | who/whom *
 | 
			
		||||
kvi            | why *
 | 
			
		||||
kvifor         | why *
 | 
			
		||||
me             | we *
 | 
			
		||||
medan          | while *
 | 
			
		||||
mi             | my *
 | 
			
		||||
mine           | my *
 | 
			
		||||
mykje          | much *
 | 
			
		||||
no             | now *
 | 
			
		||||
nokon          | some (masc./neut.) *
 | 
			
		||||
noka           | some (fem.) *
 | 
			
		||||
nokor          | some *
 | 
			
		||||
noko           | some *
 | 
			
		||||
nokre          | some *
 | 
			
		||||
si             | his/hers *
 | 
			
		||||
sia            | since *
 | 
			
		||||
sidan          | since *
 | 
			
		||||
so             | so *
 | 
			
		||||
somt           | some *
 | 
			
		||||
somme          | some *
 | 
			
		||||
um             | about*
 | 
			
		||||
upp            | up *
 | 
			
		||||
vere           | be *
 | 
			
		||||
vore           | was *
 | 
			
		||||
verte          | become *
 | 
			
		||||
vort           | become *
 | 
			
		||||
varte          | became *
 | 
			
		||||
vart           | became *
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										253
									
								
								archiver/solr-config-dir/lang/stopwords_pt.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								archiver/solr-config-dir/lang/stopwords_pt.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,253 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Portuguese stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | The following is a ranked list (commonest to rarest) of stopwords
 | 
			
		||||
 | deriving from a large sample of text.
 | 
			
		||||
 | 
			
		||||
 | Extra words have been added at the end.
 | 
			
		||||
 | 
			
		||||
de             |  of, from
 | 
			
		||||
a              |  the; to, at; her
 | 
			
		||||
o              |  the; him
 | 
			
		||||
que            |  who, that
 | 
			
		||||
e              |  and
 | 
			
		||||
do             |  de + o
 | 
			
		||||
da             |  de + a
 | 
			
		||||
em             |  in
 | 
			
		||||
um             |  a
 | 
			
		||||
para           |  for
 | 
			
		||||
  | é          from SER
 | 
			
		||||
com            |  with
 | 
			
		||||
não            |  not, no
 | 
			
		||||
uma            |  a
 | 
			
		||||
os             |  the; them
 | 
			
		||||
no             |  em + o
 | 
			
		||||
se             |  himself etc
 | 
			
		||||
na             |  em + a
 | 
			
		||||
por            |  for
 | 
			
		||||
mais           |  more
 | 
			
		||||
as             |  the; them
 | 
			
		||||
dos            |  de + os
 | 
			
		||||
como           |  as, like
 | 
			
		||||
mas            |  but
 | 
			
		||||
  | foi        from SER
 | 
			
		||||
ao             |  a + o
 | 
			
		||||
ele            |  he
 | 
			
		||||
das            |  de + as
 | 
			
		||||
  | tem        from TER
 | 
			
		||||
à              |  a + a
 | 
			
		||||
seu            |  his
 | 
			
		||||
sua            |  her
 | 
			
		||||
ou             |  or
 | 
			
		||||
  | ser        from SER
 | 
			
		||||
quando         |  when
 | 
			
		||||
muito          |  much
 | 
			
		||||
  | há         from HAV
 | 
			
		||||
nos            |  em + os; us
 | 
			
		||||
já             |  already, now
 | 
			
		||||
  | está       from EST
 | 
			
		||||
eu             |  I
 | 
			
		||||
também         |  also
 | 
			
		||||
só             |  only, just
 | 
			
		||||
pelo           |  per + o
 | 
			
		||||
pela           |  per + a
 | 
			
		||||
até            |  up to
 | 
			
		||||
isso           |  that
 | 
			
		||||
ela            |  he
 | 
			
		||||
entre          |  between
 | 
			
		||||
  | era        from SER
 | 
			
		||||
depois         |  after
 | 
			
		||||
sem            |  without
 | 
			
		||||
mesmo          |  same
 | 
			
		||||
aos            |  a + os
 | 
			
		||||
  | ter        from TER
 | 
			
		||||
seus           |  his
 | 
			
		||||
quem           |  whom
 | 
			
		||||
nas            |  em + as
 | 
			
		||||
me             |  me
 | 
			
		||||
esse           |  that
 | 
			
		||||
eles           |  they
 | 
			
		||||
  | estão      from EST
 | 
			
		||||
você           |  you
 | 
			
		||||
  | tinha      from TER
 | 
			
		||||
  | foram      from SER
 | 
			
		||||
essa           |  that
 | 
			
		||||
num            |  em + um
 | 
			
		||||
nem            |  nor
 | 
			
		||||
suas           |  her
 | 
			
		||||
meu            |  my
 | 
			
		||||
às             |  a + as
 | 
			
		||||
minha          |  my
 | 
			
		||||
  | têm        from TER
 | 
			
		||||
numa           |  em + uma
 | 
			
		||||
pelos          |  per + os
 | 
			
		||||
elas           |  they
 | 
			
		||||
  | havia      from HAV
 | 
			
		||||
  | seja       from SER
 | 
			
		||||
qual           |  which
 | 
			
		||||
  | será       from SER
 | 
			
		||||
nós            |  we
 | 
			
		||||
  | tenho      from TER
 | 
			
		||||
lhe            |  to him, her
 | 
			
		||||
deles          |  of them
 | 
			
		||||
essas          |  those
 | 
			
		||||
esses          |  those
 | 
			
		||||
pelas          |  per + as
 | 
			
		||||
este           |  this
 | 
			
		||||
  | fosse      from SER
 | 
			
		||||
dele           |  of him
 | 
			
		||||
 | 
			
		||||
 | other words. There are many contractions such as naquele = em+aquele,
 | 
			
		||||
 | mo = me+o, but they are rare.
 | 
			
		||||
 | Indefinite article plural forms are also rare.
 | 
			
		||||
 | 
			
		||||
tu             |  thou
 | 
			
		||||
te             |  thee
 | 
			
		||||
vocês          |  you (plural)
 | 
			
		||||
vos            |  you
 | 
			
		||||
lhes           |  to them
 | 
			
		||||
meus           |  my
 | 
			
		||||
minhas
 | 
			
		||||
teu            |  thy
 | 
			
		||||
tua
 | 
			
		||||
teus
 | 
			
		||||
tuas
 | 
			
		||||
nosso          | our
 | 
			
		||||
nossa
 | 
			
		||||
nossos
 | 
			
		||||
nossas
 | 
			
		||||
 | 
			
		||||
dela           |  of her
 | 
			
		||||
delas          |  of them
 | 
			
		||||
 | 
			
		||||
esta           |  this
 | 
			
		||||
estes          |  these
 | 
			
		||||
estas          |  these
 | 
			
		||||
aquele         |  that
 | 
			
		||||
aquela         |  that
 | 
			
		||||
aqueles        |  those
 | 
			
		||||
aquelas        |  those
 | 
			
		||||
isto           |  this
 | 
			
		||||
aquilo         |  that
 | 
			
		||||
 | 
			
		||||
               | forms of estar, to be (not including the infinitive):
 | 
			
		||||
estou
 | 
			
		||||
está
 | 
			
		||||
estamos
 | 
			
		||||
estão
 | 
			
		||||
estive
 | 
			
		||||
esteve
 | 
			
		||||
estivemos
 | 
			
		||||
estiveram
 | 
			
		||||
estava
 | 
			
		||||
estávamos
 | 
			
		||||
estavam
 | 
			
		||||
estivera
 | 
			
		||||
estivéramos
 | 
			
		||||
esteja
 | 
			
		||||
estejamos
 | 
			
		||||
estejam
 | 
			
		||||
estivesse
 | 
			
		||||
estivéssemos
 | 
			
		||||
estivessem
 | 
			
		||||
estiver
 | 
			
		||||
estivermos
 | 
			
		||||
estiverem
 | 
			
		||||
 | 
			
		||||
               | forms of haver, to have (not including the infinitive):
 | 
			
		||||
hei
 | 
			
		||||
há
 | 
			
		||||
havemos
 | 
			
		||||
hão
 | 
			
		||||
houve
 | 
			
		||||
houvemos
 | 
			
		||||
houveram
 | 
			
		||||
houvera
 | 
			
		||||
houvéramos
 | 
			
		||||
haja
 | 
			
		||||
hajamos
 | 
			
		||||
hajam
 | 
			
		||||
houvesse
 | 
			
		||||
houvéssemos
 | 
			
		||||
houvessem
 | 
			
		||||
houver
 | 
			
		||||
houvermos
 | 
			
		||||
houverem
 | 
			
		||||
houverei
 | 
			
		||||
houverá
 | 
			
		||||
houveremos
 | 
			
		||||
houverão
 | 
			
		||||
houveria
 | 
			
		||||
houveríamos
 | 
			
		||||
houveriam
 | 
			
		||||
 | 
			
		||||
               | forms of ser, to be (not including the infinitive):
 | 
			
		||||
sou
 | 
			
		||||
somos
 | 
			
		||||
são
 | 
			
		||||
era
 | 
			
		||||
éramos
 | 
			
		||||
eram
 | 
			
		||||
fui
 | 
			
		||||
foi
 | 
			
		||||
fomos
 | 
			
		||||
foram
 | 
			
		||||
fora
 | 
			
		||||
fôramos
 | 
			
		||||
seja
 | 
			
		||||
sejamos
 | 
			
		||||
sejam
 | 
			
		||||
fosse
 | 
			
		||||
fôssemos
 | 
			
		||||
fossem
 | 
			
		||||
for
 | 
			
		||||
formos
 | 
			
		||||
forem
 | 
			
		||||
serei
 | 
			
		||||
será
 | 
			
		||||
seremos
 | 
			
		||||
serão
 | 
			
		||||
seria
 | 
			
		||||
seríamos
 | 
			
		||||
seriam
 | 
			
		||||
 | 
			
		||||
               | forms of ter, to have (not including the infinitive):
 | 
			
		||||
tenho
 | 
			
		||||
tem
 | 
			
		||||
temos
 | 
			
		||||
tém
 | 
			
		||||
tinha
 | 
			
		||||
tínhamos
 | 
			
		||||
tinham
 | 
			
		||||
tive
 | 
			
		||||
teve
 | 
			
		||||
tivemos
 | 
			
		||||
tiveram
 | 
			
		||||
tivera
 | 
			
		||||
tivéramos
 | 
			
		||||
tenha
 | 
			
		||||
tenhamos
 | 
			
		||||
tenham
 | 
			
		||||
tivesse
 | 
			
		||||
tivéssemos
 | 
			
		||||
tivessem
 | 
			
		||||
tiver
 | 
			
		||||
tivermos
 | 
			
		||||
tiverem
 | 
			
		||||
terei
 | 
			
		||||
terá
 | 
			
		||||
teremos
 | 
			
		||||
terão
 | 
			
		||||
teria
 | 
			
		||||
teríamos
 | 
			
		||||
teriam
 | 
			
		||||
							
								
								
									
										233
									
								
								archiver/solr-config-dir/lang/stopwords_ro.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								archiver/solr-config-dir/lang/stopwords_ro.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,233 @@
 | 
			
		|||
# This file was created by Jacques Savoy and is distributed under the BSD license.
 | 
			
		||||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
 | 
			
		||||
# Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
acea
 | 
			
		||||
aceasta
 | 
			
		||||
această
 | 
			
		||||
aceea
 | 
			
		||||
acei
 | 
			
		||||
aceia
 | 
			
		||||
acel
 | 
			
		||||
acela
 | 
			
		||||
acele
 | 
			
		||||
acelea
 | 
			
		||||
acest
 | 
			
		||||
acesta
 | 
			
		||||
aceste
 | 
			
		||||
acestea
 | 
			
		||||
aceşti
 | 
			
		||||
aceştia
 | 
			
		||||
acolo
 | 
			
		||||
acum
 | 
			
		||||
ai
 | 
			
		||||
aia
 | 
			
		||||
aibă
 | 
			
		||||
aici
 | 
			
		||||
al
 | 
			
		||||
ăla
 | 
			
		||||
ale
 | 
			
		||||
alea
 | 
			
		||||
ălea
 | 
			
		||||
altceva
 | 
			
		||||
altcineva
 | 
			
		||||
am
 | 
			
		||||
ar
 | 
			
		||||
are
 | 
			
		||||
aş
 | 
			
		||||
aşadar
 | 
			
		||||
asemenea
 | 
			
		||||
asta
 | 
			
		||||
ăsta
 | 
			
		||||
astăzi
 | 
			
		||||
astea
 | 
			
		||||
ăstea
 | 
			
		||||
ăştia
 | 
			
		||||
asupra
 | 
			
		||||
aţi
 | 
			
		||||
au
 | 
			
		||||
avea
 | 
			
		||||
avem
 | 
			
		||||
aveţi
 | 
			
		||||
azi
 | 
			
		||||
bine
 | 
			
		||||
bucur
 | 
			
		||||
bună
 | 
			
		||||
ca
 | 
			
		||||
că
 | 
			
		||||
căci
 | 
			
		||||
când
 | 
			
		||||
care
 | 
			
		||||
cărei
 | 
			
		||||
căror
 | 
			
		||||
cărui
 | 
			
		||||
cât
 | 
			
		||||
câte
 | 
			
		||||
câţi
 | 
			
		||||
către
 | 
			
		||||
câtva
 | 
			
		||||
ce
 | 
			
		||||
cel
 | 
			
		||||
ceva
 | 
			
		||||
chiar
 | 
			
		||||
cînd
 | 
			
		||||
cine
 | 
			
		||||
cineva
 | 
			
		||||
cît
 | 
			
		||||
cîte
 | 
			
		||||
cîţi
 | 
			
		||||
cîtva
 | 
			
		||||
contra
 | 
			
		||||
cu
 | 
			
		||||
cum
 | 
			
		||||
cumva
 | 
			
		||||
curând
 | 
			
		||||
curînd
 | 
			
		||||
da
 | 
			
		||||
dă
 | 
			
		||||
dacă
 | 
			
		||||
dar
 | 
			
		||||
datorită
 | 
			
		||||
de
 | 
			
		||||
deci
 | 
			
		||||
deja
 | 
			
		||||
deoarece
 | 
			
		||||
departe
 | 
			
		||||
deşi
 | 
			
		||||
din
 | 
			
		||||
dinaintea
 | 
			
		||||
dintr
 | 
			
		||||
dintre
 | 
			
		||||
drept
 | 
			
		||||
după
 | 
			
		||||
ea
 | 
			
		||||
ei
 | 
			
		||||
el
 | 
			
		||||
ele
 | 
			
		||||
eram
 | 
			
		||||
este
 | 
			
		||||
eşti
 | 
			
		||||
eu
 | 
			
		||||
face
 | 
			
		||||
fără
 | 
			
		||||
fi
 | 
			
		||||
fie
 | 
			
		||||
fiecare
 | 
			
		||||
fii
 | 
			
		||||
fim
 | 
			
		||||
fiţi
 | 
			
		||||
iar
 | 
			
		||||
ieri
 | 
			
		||||
îi
 | 
			
		||||
îl
 | 
			
		||||
îmi
 | 
			
		||||
împotriva
 | 
			
		||||
în 
 | 
			
		||||
înainte
 | 
			
		||||
înaintea
 | 
			
		||||
încât
 | 
			
		||||
încît
 | 
			
		||||
încotro
 | 
			
		||||
între
 | 
			
		||||
întrucât
 | 
			
		||||
întrucît
 | 
			
		||||
îţi
 | 
			
		||||
la
 | 
			
		||||
lângă
 | 
			
		||||
le
 | 
			
		||||
li
 | 
			
		||||
lîngă
 | 
			
		||||
lor
 | 
			
		||||
lui
 | 
			
		||||
mă
 | 
			
		||||
mâine
 | 
			
		||||
mea
 | 
			
		||||
mei
 | 
			
		||||
mele
 | 
			
		||||
mereu
 | 
			
		||||
meu
 | 
			
		||||
mi
 | 
			
		||||
mine
 | 
			
		||||
mult
 | 
			
		||||
multă
 | 
			
		||||
mulţi
 | 
			
		||||
ne
 | 
			
		||||
nicăieri
 | 
			
		||||
nici
 | 
			
		||||
nimeni
 | 
			
		||||
nişte
 | 
			
		||||
noastră
 | 
			
		||||
noastre
 | 
			
		||||
noi
 | 
			
		||||
noştri
 | 
			
		||||
nostru
 | 
			
		||||
nu
 | 
			
		||||
ori
 | 
			
		||||
oricând
 | 
			
		||||
oricare
 | 
			
		||||
oricât
 | 
			
		||||
orice
 | 
			
		||||
oricînd
 | 
			
		||||
oricine
 | 
			
		||||
oricît
 | 
			
		||||
oricum
 | 
			
		||||
oriunde
 | 
			
		||||
până
 | 
			
		||||
pe
 | 
			
		||||
pentru
 | 
			
		||||
peste
 | 
			
		||||
pînă
 | 
			
		||||
poate
 | 
			
		||||
pot
 | 
			
		||||
prea
 | 
			
		||||
prima
 | 
			
		||||
primul
 | 
			
		||||
prin
 | 
			
		||||
printr
 | 
			
		||||
sa
 | 
			
		||||
să
 | 
			
		||||
săi
 | 
			
		||||
sale
 | 
			
		||||
sau
 | 
			
		||||
său
 | 
			
		||||
se
 | 
			
		||||
şi
 | 
			
		||||
sînt
 | 
			
		||||
sîntem
 | 
			
		||||
sînteţi
 | 
			
		||||
spre
 | 
			
		||||
sub
 | 
			
		||||
sunt
 | 
			
		||||
suntem
 | 
			
		||||
sunteţi
 | 
			
		||||
ta
 | 
			
		||||
tăi
 | 
			
		||||
tale
 | 
			
		||||
tău
 | 
			
		||||
te
 | 
			
		||||
ţi
 | 
			
		||||
ţie
 | 
			
		||||
tine
 | 
			
		||||
toată
 | 
			
		||||
toate
 | 
			
		||||
tot
 | 
			
		||||
toţi
 | 
			
		||||
totuşi
 | 
			
		||||
tu
 | 
			
		||||
un
 | 
			
		||||
una
 | 
			
		||||
unde
 | 
			
		||||
undeva
 | 
			
		||||
unei
 | 
			
		||||
unele
 | 
			
		||||
uneori
 | 
			
		||||
unor
 | 
			
		||||
vă
 | 
			
		||||
vi
 | 
			
		||||
voastră
 | 
			
		||||
voastre
 | 
			
		||||
voi
 | 
			
		||||
voştri
 | 
			
		||||
vostru
 | 
			
		||||
vouă
 | 
			
		||||
vreo
 | 
			
		||||
vreun
 | 
			
		||||
							
								
								
									
										243
									
								
								archiver/solr-config-dir/lang/stopwords_ru.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								archiver/solr-config-dir/lang/stopwords_ru.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,243 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | a russian stop word list. comments begin with vertical bar. each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | this is a ranked list (commonest to rarest) of stopwords derived from
 | 
			
		||||
 | a large text sample.
 | 
			
		||||
 | 
			
		||||
 | letter `ё' is translated to `е'.
 | 
			
		||||
 | 
			
		||||
и              | and
 | 
			
		||||
в              | in/into
 | 
			
		||||
во             | alternative form
 | 
			
		||||
не             | not
 | 
			
		||||
что            | what/that
 | 
			
		||||
он             | he
 | 
			
		||||
на             | on/onto
 | 
			
		||||
я              | i
 | 
			
		||||
с              | from
 | 
			
		||||
со             | alternative form
 | 
			
		||||
как            | how
 | 
			
		||||
а              | milder form of `no' (but)
 | 
			
		||||
то             | conjunction and form of `that'
 | 
			
		||||
все            | all
 | 
			
		||||
она            | she
 | 
			
		||||
так            | so, thus
 | 
			
		||||
его            | him
 | 
			
		||||
но             | but
 | 
			
		||||
да             | yes/and
 | 
			
		||||
ты             | thou
 | 
			
		||||
к              | towards, by
 | 
			
		||||
у              | around, chez
 | 
			
		||||
же             | intensifier particle
 | 
			
		||||
вы             | you
 | 
			
		||||
за             | beyond, behind
 | 
			
		||||
бы             | conditional/subj. particle
 | 
			
		||||
по             | up to, along
 | 
			
		||||
только         | only
 | 
			
		||||
ее             | her
 | 
			
		||||
мне            | to me
 | 
			
		||||
было           | it was
 | 
			
		||||
вот            | here is/are, particle
 | 
			
		||||
от             | away from
 | 
			
		||||
меня           | me
 | 
			
		||||
еще            | still, yet, more
 | 
			
		||||
нет            | no, there isnt/arent
 | 
			
		||||
о              | about
 | 
			
		||||
из             | out of
 | 
			
		||||
ему            | to him
 | 
			
		||||
теперь         | now
 | 
			
		||||
когда          | when
 | 
			
		||||
даже           | even
 | 
			
		||||
ну             | so, well
 | 
			
		||||
вдруг          | suddenly
 | 
			
		||||
ли             | interrogative particle
 | 
			
		||||
если           | if
 | 
			
		||||
уже            | already, but homonym of `narrower'
 | 
			
		||||
или            | or
 | 
			
		||||
ни             | neither
 | 
			
		||||
быть           | to be
 | 
			
		||||
был            | he was
 | 
			
		||||
него           | prepositional form of его
 | 
			
		||||
до             | up to
 | 
			
		||||
вас            | you accusative
 | 
			
		||||
нибудь         | indef. suffix preceded by hyphen
 | 
			
		||||
опять          | again
 | 
			
		||||
уж             | already, but homonym of `adder'
 | 
			
		||||
вам            | to you
 | 
			
		||||
сказал         | he said
 | 
			
		||||
ведь           | particle `after all'
 | 
			
		||||
там            | there
 | 
			
		||||
потом          | then
 | 
			
		||||
себя           | oneself
 | 
			
		||||
ничего         | nothing
 | 
			
		||||
ей             | to her
 | 
			
		||||
может          | usually with `быть' as `maybe'
 | 
			
		||||
они            | they
 | 
			
		||||
тут            | here
 | 
			
		||||
где            | where
 | 
			
		||||
есть           | there is/are
 | 
			
		||||
надо           | got to, must
 | 
			
		||||
ней            | prepositional form of  ей
 | 
			
		||||
для            | for
 | 
			
		||||
мы             | we
 | 
			
		||||
тебя           | thee
 | 
			
		||||
их             | them, their
 | 
			
		||||
чем            | than
 | 
			
		||||
была           | she was
 | 
			
		||||
сам            | self
 | 
			
		||||
чтоб           | in order to
 | 
			
		||||
без            | without
 | 
			
		||||
будто          | as if
 | 
			
		||||
человек        | man, person, one
 | 
			
		||||
чего           | genitive form of `what'
 | 
			
		||||
раз            | once
 | 
			
		||||
тоже           | also
 | 
			
		||||
себе           | to oneself
 | 
			
		||||
под            | beneath
 | 
			
		||||
жизнь          | life
 | 
			
		||||
будет          | will be
 | 
			
		||||
ж              | short form of intensifer particle `же'
 | 
			
		||||
тогда          | then
 | 
			
		||||
кто            | who
 | 
			
		||||
этот           | this
 | 
			
		||||
говорил        | was saying
 | 
			
		||||
того           | genitive form of `that'
 | 
			
		||||
потому         | for that reason
 | 
			
		||||
этого          | genitive form of `this'
 | 
			
		||||
какой          | which
 | 
			
		||||
совсем         | altogether
 | 
			
		||||
ним            | prepositional form of `его', `они'
 | 
			
		||||
здесь          | here
 | 
			
		||||
этом           | prepositional form of `этот'
 | 
			
		||||
один           | one
 | 
			
		||||
почти          | almost
 | 
			
		||||
мой            | my
 | 
			
		||||
тем            | instrumental/dative plural of `тот', `то'
 | 
			
		||||
чтобы          | full form of `in order that'
 | 
			
		||||
нее            | her (acc.)
 | 
			
		||||
кажется        | it seems
 | 
			
		||||
сейчас         | now
 | 
			
		||||
были           | they were
 | 
			
		||||
куда           | where to
 | 
			
		||||
зачем          | why
 | 
			
		||||
сказать        | to say
 | 
			
		||||
всех           | all (acc., gen. preposn. plural)
 | 
			
		||||
никогда        | never
 | 
			
		||||
сегодня        | today
 | 
			
		||||
можно          | possible, one can
 | 
			
		||||
при            | by
 | 
			
		||||
наконец        | finally
 | 
			
		||||
два            | two
 | 
			
		||||
об             | alternative form of `о', about
 | 
			
		||||
другой         | another
 | 
			
		||||
хоть           | even
 | 
			
		||||
после          | after
 | 
			
		||||
над            | above
 | 
			
		||||
больше         | more
 | 
			
		||||
тот            | that one (masc.)
 | 
			
		||||
через          | across, in
 | 
			
		||||
эти            | these
 | 
			
		||||
нас            | us
 | 
			
		||||
про            | about
 | 
			
		||||
всего          | in all, only, of all
 | 
			
		||||
них            | prepositional form of `они' (they)
 | 
			
		||||
какая          | which, feminine
 | 
			
		||||
много          | lots
 | 
			
		||||
разве          | interrogative particle
 | 
			
		||||
сказала        | she said
 | 
			
		||||
три            | three
 | 
			
		||||
эту            | this, acc. fem. sing.
 | 
			
		||||
моя            | my, feminine
 | 
			
		||||
впрочем        | moreover, besides
 | 
			
		||||
хорошо         | good
 | 
			
		||||
свою           | ones own, acc. fem. sing.
 | 
			
		||||
этой           | oblique form of `эта', fem. `this'
 | 
			
		||||
перед          | in front of
 | 
			
		||||
иногда         | sometimes
 | 
			
		||||
лучше          | better
 | 
			
		||||
чуть           | a little
 | 
			
		||||
том            | preposn. form of `that one'
 | 
			
		||||
нельзя         | one must not
 | 
			
		||||
такой          | such a one
 | 
			
		||||
им             | to them
 | 
			
		||||
более          | more
 | 
			
		||||
всегда         | always
 | 
			
		||||
конечно        | of course
 | 
			
		||||
всю            | acc. fem. sing of `all'
 | 
			
		||||
между          | between
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  | b: some paradigms
 | 
			
		||||
  |
 | 
			
		||||
  | personal pronouns
 | 
			
		||||
  |
 | 
			
		||||
  | я  меня  мне  мной  [мною]
 | 
			
		||||
  | ты  тебя  тебе  тобой  [тобою]
 | 
			
		||||
  | он  его  ему  им  [него, нему, ним]
 | 
			
		||||
  | она  ее  эи  ею  [нее, нэи, нею]
 | 
			
		||||
  | оно  его  ему  им  [него, нему, ним]
 | 
			
		||||
  |
 | 
			
		||||
  | мы  нас  нам  нами
 | 
			
		||||
  | вы  вас  вам  вами
 | 
			
		||||
  | они  их  им  ими  [них, ним, ними]
 | 
			
		||||
  |
 | 
			
		||||
  |   себя  себе  собой   [собою]
 | 
			
		||||
  |
 | 
			
		||||
  | demonstrative pronouns: этот (this), тот (that)
 | 
			
		||||
  |
 | 
			
		||||
  | этот  эта  это  эти
 | 
			
		||||
  | этого  эты  это  эти
 | 
			
		||||
  | этого  этой  этого  этих
 | 
			
		||||
  | этому  этой  этому  этим
 | 
			
		||||
  | этим  этой  этим  [этою]  этими
 | 
			
		||||
  | этом  этой  этом  этих
 | 
			
		||||
  |
 | 
			
		||||
  | тот  та  то  те
 | 
			
		||||
  | того  ту  то  те
 | 
			
		||||
  | того  той  того  тех
 | 
			
		||||
  | тому  той  тому  тем
 | 
			
		||||
  | тем  той  тем  [тою]  теми
 | 
			
		||||
  | том  той  том  тех
 | 
			
		||||
  |
 | 
			
		||||
  | determinative pronouns
 | 
			
		||||
  |
 | 
			
		||||
  | (a) весь (all)
 | 
			
		||||
  |
 | 
			
		||||
  | весь  вся  все  все
 | 
			
		||||
  | всего  всю  все  все
 | 
			
		||||
  | всего  всей  всего  всех
 | 
			
		||||
  | всему  всей  всему  всем
 | 
			
		||||
  | всем  всей  всем  [всею]  всеми
 | 
			
		||||
  | всем  всей  всем  всех
 | 
			
		||||
  |
 | 
			
		||||
  | (b) сам (himself etc)
 | 
			
		||||
  |
 | 
			
		||||
  | сам  сама  само  сами
 | 
			
		||||
  | самого саму  само  самих
 | 
			
		||||
  | самого самой самого  самих
 | 
			
		||||
  | самому самой самому  самим
 | 
			
		||||
  | самим  самой  самим  [самою]  самими
 | 
			
		||||
  | самом самой самом  самих
 | 
			
		||||
  |
 | 
			
		||||
  | stems of verbs `to be', `to have', `to do' and modal
 | 
			
		||||
  |
 | 
			
		||||
  | быть  бы  буд  быв  есть  суть
 | 
			
		||||
  | име
 | 
			
		||||
  | дел
 | 
			
		||||
  | мог   мож  мочь
 | 
			
		||||
  | уме
 | 
			
		||||
  | хоч  хот
 | 
			
		||||
  | долж
 | 
			
		||||
  | можн
 | 
			
		||||
  | нужн
 | 
			
		||||
  | нельзя
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										133
									
								
								archiver/solr-config-dir/lang/stopwords_sv.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								archiver/solr-config-dir/lang/stopwords_sv.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,133 @@
 | 
			
		|||
 | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
 | 
			
		||||
 | This file is distributed under the BSD License.
 | 
			
		||||
 | See http://snowball.tartarus.org/license.php
 | 
			
		||||
 | Also see http://www.opensource.org/licenses/bsd-license.html
 | 
			
		||||
 |  - Encoding was converted to UTF-8.
 | 
			
		||||
 |  - This notice was added.
 | 
			
		||||
 |
 | 
			
		||||
 | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
 | 
			
		||||
 | 
			
		||||
 | A Swedish stop word list. Comments begin with vertical bar. Each stop
 | 
			
		||||
 | word is at the start of a line.
 | 
			
		||||
 | 
			
		||||
 | This is a ranked list (commonest to rarest) of stopwords derived from
 | 
			
		||||
 | a large text sample.
 | 
			
		||||
 | 
			
		||||
 | Swedish stop words occasionally exhibit homonym clashes. For example
 | 
			
		||||
 |  så = so, but also seed. These are indicated clearly below.
 | 
			
		||||
 | 
			
		||||
och            | and
 | 
			
		||||
det            | it, this/that
 | 
			
		||||
att            | to (with infinitive)
 | 
			
		||||
i              | in, at
 | 
			
		||||
en             | a
 | 
			
		||||
jag            | I
 | 
			
		||||
hon            | she
 | 
			
		||||
som            | who, that
 | 
			
		||||
han            | he
 | 
			
		||||
på             | on
 | 
			
		||||
den            | it, this/that
 | 
			
		||||
med            | with
 | 
			
		||||
var            | where, each
 | 
			
		||||
sig            | him(self) etc
 | 
			
		||||
för            | for
 | 
			
		||||
så             | so (also: seed)
 | 
			
		||||
till           | to
 | 
			
		||||
är             | is
 | 
			
		||||
men            | but
 | 
			
		||||
ett            | a
 | 
			
		||||
om             | if; around, about
 | 
			
		||||
hade           | had
 | 
			
		||||
de             | they, these/those
 | 
			
		||||
av             | of
 | 
			
		||||
icke           | not, no
 | 
			
		||||
mig            | me
 | 
			
		||||
du             | you
 | 
			
		||||
henne          | her
 | 
			
		||||
då             | then, when
 | 
			
		||||
sin            | his
 | 
			
		||||
nu             | now
 | 
			
		||||
har            | have
 | 
			
		||||
inte           | inte någon = no one
 | 
			
		||||
hans           | his
 | 
			
		||||
honom          | him
 | 
			
		||||
skulle         | 'sake'
 | 
			
		||||
hennes         | her
 | 
			
		||||
där            | there
 | 
			
		||||
min            | my
 | 
			
		||||
man            | one (pronoun)
 | 
			
		||||
ej             | nor
 | 
			
		||||
vid            | at, by, on (also: vast)
 | 
			
		||||
kunde          | could
 | 
			
		||||
något          | some etc
 | 
			
		||||
från           | from, off
 | 
			
		||||
ut             | out
 | 
			
		||||
när            | when
 | 
			
		||||
efter          | after, behind
 | 
			
		||||
upp            | up
 | 
			
		||||
vi             | we
 | 
			
		||||
dem            | them
 | 
			
		||||
vara           | be
 | 
			
		||||
vad            | what
 | 
			
		||||
över           | over
 | 
			
		||||
än             | than
 | 
			
		||||
dig            | you
 | 
			
		||||
kan            | can
 | 
			
		||||
sina           | his
 | 
			
		||||
här            | here
 | 
			
		||||
ha             | have
 | 
			
		||||
mot            | towards
 | 
			
		||||
alla           | all
 | 
			
		||||
under          | under (also: wonder)
 | 
			
		||||
någon          | some etc
 | 
			
		||||
eller          | or (else)
 | 
			
		||||
allt           | all
 | 
			
		||||
mycket         | much
 | 
			
		||||
sedan          | since
 | 
			
		||||
ju             | why
 | 
			
		||||
denna          | this/that
 | 
			
		||||
själv          | myself, yourself etc
 | 
			
		||||
detta          | this/that
 | 
			
		||||
åt             | to
 | 
			
		||||
utan           | without
 | 
			
		||||
varit          | was
 | 
			
		||||
hur            | how
 | 
			
		||||
ingen          | no
 | 
			
		||||
mitt           | my
 | 
			
		||||
ni             | you
 | 
			
		||||
bli            | to be, become
 | 
			
		||||
blev           | from bli
 | 
			
		||||
oss            | us
 | 
			
		||||
din            | thy
 | 
			
		||||
dessa          | these/those
 | 
			
		||||
några          | some etc
 | 
			
		||||
deras          | their
 | 
			
		||||
blir           | from bli
 | 
			
		||||
mina           | my
 | 
			
		||||
samma          | (the) same
 | 
			
		||||
vilken         | who, that
 | 
			
		||||
er             | you, your
 | 
			
		||||
sådan          | such a
 | 
			
		||||
vår            | our
 | 
			
		||||
blivit         | from bli
 | 
			
		||||
dess           | its
 | 
			
		||||
inom           | within
 | 
			
		||||
mellan         | between
 | 
			
		||||
sådant         | such a
 | 
			
		||||
varför         | why
 | 
			
		||||
varje          | each
 | 
			
		||||
vilka          | who, that
 | 
			
		||||
ditt           | thy
 | 
			
		||||
vem            | who
 | 
			
		||||
vilket         | who, that
 | 
			
		||||
sitta          | his
 | 
			
		||||
sådana         | such a
 | 
			
		||||
vart           | each
 | 
			
		||||
dina           | thy
 | 
			
		||||
vars           | whose
 | 
			
		||||
vårt           | our
 | 
			
		||||
våra           | our
 | 
			
		||||
ert            | your
 | 
			
		||||
era            | your
 | 
			
		||||
vilkas         | whose
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_th.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_th.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,119 @@
 | 
			
		|||
# Thai stopwords from:
 | 
			
		||||
# "Opinion Detection in Thai Political News Columns
 | 
			
		||||
# Based on Subjectivity Analysis"
 | 
			
		||||
# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
 | 
			
		||||
ไว้
 | 
			
		||||
ไม่
 | 
			
		||||
ไป
 | 
			
		||||
ได้
 | 
			
		||||
ให้
 | 
			
		||||
ใน
 | 
			
		||||
โดย
 | 
			
		||||
แห่ง
 | 
			
		||||
แล้ว
 | 
			
		||||
และ
 | 
			
		||||
แรก
 | 
			
		||||
แบบ
 | 
			
		||||
แต่
 | 
			
		||||
เอง
 | 
			
		||||
เห็น
 | 
			
		||||
เลย
 | 
			
		||||
เริ่ม
 | 
			
		||||
เรา
 | 
			
		||||
เมื่อ
 | 
			
		||||
เพื่อ
 | 
			
		||||
เพราะ
 | 
			
		||||
เป็นการ
 | 
			
		||||
เป็น
 | 
			
		||||
เปิดเผย
 | 
			
		||||
เปิด
 | 
			
		||||
เนื่องจาก
 | 
			
		||||
เดียวกัน
 | 
			
		||||
เดียว
 | 
			
		||||
เช่น
 | 
			
		||||
เฉพาะ
 | 
			
		||||
เคย
 | 
			
		||||
เข้า
 | 
			
		||||
เขา
 | 
			
		||||
อีก
 | 
			
		||||
อาจ
 | 
			
		||||
อะไร
 | 
			
		||||
ออก
 | 
			
		||||
อย่าง
 | 
			
		||||
อยู่
 | 
			
		||||
อยาก
 | 
			
		||||
หาก
 | 
			
		||||
หลาย
 | 
			
		||||
หลังจาก
 | 
			
		||||
หลัง
 | 
			
		||||
หรือ
 | 
			
		||||
หนึ่ง
 | 
			
		||||
ส่วน
 | 
			
		||||
ส่ง
 | 
			
		||||
สุด
 | 
			
		||||
สําหรับ
 | 
			
		||||
ว่า
 | 
			
		||||
วัน
 | 
			
		||||
ลง
 | 
			
		||||
ร่วม
 | 
			
		||||
ราย
 | 
			
		||||
รับ
 | 
			
		||||
ระหว่าง
 | 
			
		||||
รวม
 | 
			
		||||
ยัง
 | 
			
		||||
มี
 | 
			
		||||
มาก
 | 
			
		||||
มา
 | 
			
		||||
พร้อม
 | 
			
		||||
พบ
 | 
			
		||||
ผ่าน
 | 
			
		||||
ผล
 | 
			
		||||
บาง
 | 
			
		||||
น่า
 | 
			
		||||
นี้
 | 
			
		||||
นํา
 | 
			
		||||
นั้น
 | 
			
		||||
นัก
 | 
			
		||||
นอกจาก
 | 
			
		||||
ทุก
 | 
			
		||||
ที่สุด
 | 
			
		||||
ที่
 | 
			
		||||
ทําให้
 | 
			
		||||
ทํา
 | 
			
		||||
ทาง
 | 
			
		||||
ทั้งนี้
 | 
			
		||||
ทั้ง
 | 
			
		||||
ถ้า
 | 
			
		||||
ถูก
 | 
			
		||||
ถึง
 | 
			
		||||
ต้อง
 | 
			
		||||
ต่างๆ
 | 
			
		||||
ต่าง
 | 
			
		||||
ต่อ
 | 
			
		||||
ตาม
 | 
			
		||||
ตั้งแต่
 | 
			
		||||
ตั้ง
 | 
			
		||||
ด้าน
 | 
			
		||||
ด้วย
 | 
			
		||||
ดัง
 | 
			
		||||
ซึ่ง
 | 
			
		||||
ช่วง
 | 
			
		||||
จึง
 | 
			
		||||
จาก
 | 
			
		||||
จัด
 | 
			
		||||
จะ
 | 
			
		||||
คือ
 | 
			
		||||
ความ
 | 
			
		||||
ครั้ง
 | 
			
		||||
คง
 | 
			
		||||
ขึ้น
 | 
			
		||||
ของ
 | 
			
		||||
ขอ
 | 
			
		||||
ขณะ
 | 
			
		||||
ก่อน
 | 
			
		||||
ก็
 | 
			
		||||
การ
 | 
			
		||||
กับ
 | 
			
		||||
กัน
 | 
			
		||||
กว่า
 | 
			
		||||
กล่าว
 | 
			
		||||
							
								
								
									
										212
									
								
								archiver/solr-config-dir/lang/stopwords_tr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								archiver/solr-config-dir/lang/stopwords_tr.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,212 @@
 | 
			
		|||
# Turkish stopwords from LUCENE-559
 | 
			
		||||
# merged with the list from "Information Retrieval on Turkish Texts"
 | 
			
		||||
#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
 | 
			
		||||
acaba
 | 
			
		||||
altmış
 | 
			
		||||
altı
 | 
			
		||||
ama
 | 
			
		||||
ancak
 | 
			
		||||
arada
 | 
			
		||||
aslında
 | 
			
		||||
ayrıca
 | 
			
		||||
bana
 | 
			
		||||
bazı
 | 
			
		||||
belki
 | 
			
		||||
ben
 | 
			
		||||
benden
 | 
			
		||||
beni
 | 
			
		||||
benim
 | 
			
		||||
beri
 | 
			
		||||
beş
 | 
			
		||||
bile
 | 
			
		||||
bin
 | 
			
		||||
bir
 | 
			
		||||
birçok
 | 
			
		||||
biri
 | 
			
		||||
birkaç
 | 
			
		||||
birkez
 | 
			
		||||
birşey
 | 
			
		||||
birşeyi
 | 
			
		||||
biz
 | 
			
		||||
bize
 | 
			
		||||
bizden
 | 
			
		||||
bizi
 | 
			
		||||
bizim
 | 
			
		||||
böyle
 | 
			
		||||
böylece
 | 
			
		||||
bu
 | 
			
		||||
buna
 | 
			
		||||
bunda
 | 
			
		||||
bundan
 | 
			
		||||
bunlar
 | 
			
		||||
bunları
 | 
			
		||||
bunların
 | 
			
		||||
bunu
 | 
			
		||||
bunun
 | 
			
		||||
burada
 | 
			
		||||
çok
 | 
			
		||||
çünkü
 | 
			
		||||
da
 | 
			
		||||
daha
 | 
			
		||||
dahi
 | 
			
		||||
de
 | 
			
		||||
defa
 | 
			
		||||
değil
 | 
			
		||||
diğer
 | 
			
		||||
diye
 | 
			
		||||
doksan
 | 
			
		||||
dokuz
 | 
			
		||||
dolayı
 | 
			
		||||
dolayısıyla
 | 
			
		||||
dört
 | 
			
		||||
edecek
 | 
			
		||||
eden
 | 
			
		||||
ederek
 | 
			
		||||
edilecek
 | 
			
		||||
ediliyor
 | 
			
		||||
edilmesi
 | 
			
		||||
ediyor
 | 
			
		||||
eğer
 | 
			
		||||
elli
 | 
			
		||||
en
 | 
			
		||||
etmesi
 | 
			
		||||
etti
 | 
			
		||||
ettiği
 | 
			
		||||
ettiğini
 | 
			
		||||
gibi
 | 
			
		||||
göre
 | 
			
		||||
halen
 | 
			
		||||
hangi
 | 
			
		||||
hatta
 | 
			
		||||
hem
 | 
			
		||||
henüz
 | 
			
		||||
hep
 | 
			
		||||
hepsi
 | 
			
		||||
her
 | 
			
		||||
herhangi
 | 
			
		||||
herkesin
 | 
			
		||||
hiç
 | 
			
		||||
hiçbir
 | 
			
		||||
için
 | 
			
		||||
iki
 | 
			
		||||
ile
 | 
			
		||||
ilgili
 | 
			
		||||
ise
 | 
			
		||||
işte
 | 
			
		||||
itibaren
 | 
			
		||||
itibariyle
 | 
			
		||||
kadar
 | 
			
		||||
karşın
 | 
			
		||||
katrilyon
 | 
			
		||||
kendi
 | 
			
		||||
kendilerine
 | 
			
		||||
kendini
 | 
			
		||||
kendisi
 | 
			
		||||
kendisine
 | 
			
		||||
kendisini
 | 
			
		||||
kez
 | 
			
		||||
ki
 | 
			
		||||
kim
 | 
			
		||||
kimden
 | 
			
		||||
kime
 | 
			
		||||
kimi
 | 
			
		||||
kimse
 | 
			
		||||
kırk
 | 
			
		||||
milyar
 | 
			
		||||
milyon
 | 
			
		||||
mu
 | 
			
		||||
mü
 | 
			
		||||
mı
 | 
			
		||||
nasıl
 | 
			
		||||
ne
 | 
			
		||||
neden
 | 
			
		||||
nedenle
 | 
			
		||||
nerde
 | 
			
		||||
nerede
 | 
			
		||||
nereye
 | 
			
		||||
niye
 | 
			
		||||
niçin
 | 
			
		||||
o
 | 
			
		||||
olan
 | 
			
		||||
olarak
 | 
			
		||||
oldu
 | 
			
		||||
olduğu
 | 
			
		||||
olduğunu
 | 
			
		||||
olduklarını
 | 
			
		||||
olmadı
 | 
			
		||||
olmadığı
 | 
			
		||||
olmak
 | 
			
		||||
olması
 | 
			
		||||
olmayan
 | 
			
		||||
olmaz
 | 
			
		||||
olsa
 | 
			
		||||
olsun
 | 
			
		||||
olup
 | 
			
		||||
olur
 | 
			
		||||
olursa
 | 
			
		||||
oluyor
 | 
			
		||||
on
 | 
			
		||||
ona
 | 
			
		||||
ondan
 | 
			
		||||
onlar
 | 
			
		||||
onlardan
 | 
			
		||||
onları
 | 
			
		||||
onların
 | 
			
		||||
onu
 | 
			
		||||
onun
 | 
			
		||||
otuz
 | 
			
		||||
oysa
 | 
			
		||||
öyle
 | 
			
		||||
pek
 | 
			
		||||
rağmen
 | 
			
		||||
sadece
 | 
			
		||||
sanki
 | 
			
		||||
sekiz
 | 
			
		||||
seksen
 | 
			
		||||
sen
 | 
			
		||||
senden
 | 
			
		||||
seni
 | 
			
		||||
senin
 | 
			
		||||
siz
 | 
			
		||||
sizden
 | 
			
		||||
sizi
 | 
			
		||||
sizin
 | 
			
		||||
şey
 | 
			
		||||
şeyden
 | 
			
		||||
şeyi
 | 
			
		||||
şeyler
 | 
			
		||||
şöyle
 | 
			
		||||
şu
 | 
			
		||||
şuna
 | 
			
		||||
şunda
 | 
			
		||||
şundan
 | 
			
		||||
şunları
 | 
			
		||||
şunu
 | 
			
		||||
tarafından
 | 
			
		||||
trilyon
 | 
			
		||||
tüm
 | 
			
		||||
üç
 | 
			
		||||
üzere
 | 
			
		||||
var
 | 
			
		||||
vardı
 | 
			
		||||
ve
 | 
			
		||||
veya
 | 
			
		||||
ya
 | 
			
		||||
yani
 | 
			
		||||
yapacak
 | 
			
		||||
yapılan
 | 
			
		||||
yapılması
 | 
			
		||||
yapıyor
 | 
			
		||||
yapmak
 | 
			
		||||
yaptı
 | 
			
		||||
yaptığı
 | 
			
		||||
yaptığını
 | 
			
		||||
yaptıkları
 | 
			
		||||
yedi
 | 
			
		||||
yerine
 | 
			
		||||
yetmiş
 | 
			
		||||
yine
 | 
			
		||||
yirmi
 | 
			
		||||
yoksa
 | 
			
		||||
yüz
 | 
			
		||||
zaten
 | 
			
		||||
							
								
								
									
										29
									
								
								archiver/solr-config-dir/lang/userdict_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								archiver/solr-config-dir/lang/userdict_ja.txt
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,29 @@
 | 
			
		|||
#
 | 
			
		||||
# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
 | 
			
		||||
#
 | 
			
		||||
# Add entries to this file in order to override the statistical model in terms
 | 
			
		||||
# of segmentation, readings and part-of-speech tags.  Notice that entries do
 | 
			
		||||
# not have weights since they are always used when found.  This is by-design
 | 
			
		||||
# in order to maximize ease-of-use.
 | 
			
		||||
#
 | 
			
		||||
# Entries are defined using the following CSV format:
 | 
			
		||||
#  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
 | 
			
		||||
#
 | 
			
		||||
# Notice that a single half-width space separates tokens and readings, and
 | 
			
		||||
# that the number tokens and readings must match exactly.
 | 
			
		||||
#
 | 
			
		||||
# Also notice that multiple entries with the same <text> is undefined.
 | 
			
		||||
#
 | 
			
		||||
# Whitespace only lines are ignored.  Comments are not allowed on entry lines.
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
# Custom segmentation for kanji compounds
 | 
			
		||||
日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
 | 
			
		||||
関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
 | 
			
		||||
 | 
			
		||||
# Custom segmentation for compound katakana
 | 
			
		||||
トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
 | 
			
		||||
ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
 | 
			
		||||
 | 
			
		||||
# Custom reading for former sumo wrestler
 | 
			
		||||
朝青龍,朝青龍,アサショウリュウ,カスタム人名
 | 
			
		||||
							
								
								
									
										153
									
								
								archiver/solr-config-dir/schema.xml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								archiver/solr-config-dir/schema.xml
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,153 @@
 | 
			
		|||
<?xml version="1.0" encoding="UTF-8" ?>
 | 
			
		||||
<schema name="example" version="1.6">
 | 
			
		||||
 | 
			
		||||
  <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
 | 
			
		||||
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
 | 
			
		||||
  <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
 | 
			
		||||
 | 
			
		||||
  <!-- boolean type: "true" or "false" -->
 | 
			
		||||
  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
 | 
			
		||||
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
 | 
			
		||||
 | 
			
		||||
  <!--
 | 
			
		||||
      Numeric field types that index values using KD-trees.
 | 
			
		||||
      Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
 | 
			
		||||
  -->
 | 
			
		||||
  <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
 | 
			
		||||
  <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
 | 
			
		||||
  <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
 | 
			
		||||
  <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
 | 
			
		||||
 | 
			
		||||
  <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
 | 
			
		||||
  <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
 | 
			
		||||
  <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
 | 
			
		||||
  <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
 | 
			
		||||
  <fieldType name="random" class="solr.RandomSortField" indexed="true"/>
 | 
			
		||||
 | 
			
		||||
  <!-- since fields of this type are by default not stored or indexed,
 | 
			
		||||
       any data added to them will be ignored outright.  -->
 | 
			
		||||
  <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
 | 
			
		||||
 | 
			
		||||
  <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
 | 
			
		||||
       is a more restricted form of the canonical representation of dateTime
 | 
			
		||||
       http://www.w3.org/TR/xmlschema-2/#dateTime
 | 
			
		||||
       The trailing "Z" designates UTC time and is mandatory.
 | 
			
		||||
       Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
 | 
			
		||||
       All other components are mandatory.
 | 
			
		||||
 | 
			
		||||
Expressions can also be used to denote calculations that should be
 | 
			
		||||
performed relative to "NOW" to determine the value, ie...
 | 
			
		||||
 | 
			
		||||
NOW/HOUR
 | 
			
		||||
... Round to the start of the current hour
 | 
			
		||||
NOW-1DAY
 | 
			
		||||
... Exactly 1 day prior to now
 | 
			
		||||
NOW/DAY+6MONTHS+3DAYS
 | 
			
		||||
... 6 months and 3 days in the future from the start of
 | 
			
		||||
the current day
 | 
			
		||||
 | 
			
		||||
  -->
 | 
			
		||||
  <!-- KD-tree versions of date fields -->
 | 
			
		||||
  <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
 | 
			
		||||
  <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
 | 
			
		||||
 | 
			
		||||
  <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
 | 
			
		||||
  <fieldType name="binary" class="solr.BinaryField"/>
 | 
			
		||||
 | 
			
		||||
  <!--
 | 
			
		||||
      RankFields can be used to store scoring factors to improve document ranking. They should be used
 | 
			
		||||
      in combination with RankQParserPlugin.
 | 
			
		||||
      (experimental)
 | 
			
		||||
  -->
 | 
			
		||||
  <fieldType name="rank" class="solr.RankField"/>
 | 
			
		||||
 | 
			
		||||
  <!-- solr.TextField allows the specification of custom text analyzers
 | 
			
		||||
       specified as a tokenizer and a list of token filters. Different
 | 
			
		||||
       analyzers may be specified for indexing and querying.
 | 
			
		||||
 | 
			
		||||
The optional positionIncrementGap puts space between multiple fields of
 | 
			
		||||
this type on the same document, with the purpose of preventing false phrase
 | 
			
		||||
matching across fields.
 | 
			
		||||
 | 
			
		||||
For more info on customizing your analyzer chain, please see
 | 
			
		||||
https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters
 | 
			
		||||
  -->
 | 
			
		||||
 | 
			
		||||
  <!-- One can also specify an existing Analyzer class that has a
 | 
			
		||||
       default constructor via the class attribute on the analyzer element.
 | 
			
		||||
       Example:
 | 
			
		||||
       <fieldType name="text_greek" class="solr.TextField">
 | 
			
		||||
       <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
 | 
			
		||||
       </fieldType>
 | 
			
		||||
  -->
 | 
			
		||||
 | 
			
		||||
  <fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">
 | 
			
		||||
    <analyzer type="index">
 | 
			
		||||
      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
 | 
			
		||||
      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />
 | 
			
		||||
    </analyzer>
 | 
			
		||||
    <analyzer type="query">
 | 
			
		||||
      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
 | 
			
		||||
    </analyzer>
 | 
			
		||||
  </fieldType>
 | 
			
		||||
 | 
			
		||||
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
 | 
			
		||||
    <analyzer type="index">
 | 
			
		||||
      <tokenizer name="standard"/>
 | 
			
		||||
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
 | 
			
		||||
      <!-- in this example, we will only use synonyms at query time
 | 
			
		||||
           <filter name="synonymGraph" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
 | 
			
		||||
           <filter name="flattenGraph"/>
 | 
			
		||||
      -->
 | 
			
		||||
      <filter name="lowercase"/>
 | 
			
		||||
    </analyzer>
 | 
			
		||||
    <analyzer type="query">
 | 
			
		||||
      <tokenizer name="standard"/>
 | 
			
		||||
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
 | 
			
		||||
      <filter name="lowercase"/>
 | 
			
		||||
    </analyzer>
 | 
			
		||||
  </fieldType>
 | 
			
		||||
 | 
			
		||||
  <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
 | 
			
		||||
    <analyzer type="index">
 | 
			
		||||
      <tokenizer name="whitespace"/>
 | 
			
		||||
      <!-- Case insensitive stop word removal.
 | 
			
		||||
      -->
 | 
			
		||||
      <filter name="stop"
 | 
			
		||||
              ignoreCase="true"
 | 
			
		||||
              words="lang/stopwords_en.txt"
 | 
			
		||||
              />
 | 
			
		||||
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
 | 
			
		||||
      <filter name="lowercase"/>
 | 
			
		||||
      <filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />
 | 
			
		||||
      <filter name="porterStem"/>
 | 
			
		||||
      <filter name="flattenGraph" />
 | 
			
		||||
    </analyzer>
 | 
			
		||||
    <analyzer type="query">
 | 
			
		||||
      <tokenizer name="whitespace"/>
 | 
			
		||||
      <filter name="stop"
 | 
			
		||||
              ignoreCase="true"
 | 
			
		||||
              words="lang/stopwords_en.txt"
 | 
			
		||||
              />
 | 
			
		||||
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
 | 
			
		||||
      <filter name="lowercase"/>
 | 
			
		||||
      <filter name="porterStem"/>
 | 
			
		||||
    </analyzer>
 | 
			
		||||
  </fieldType>
 | 
			
		||||
 | 
			
		||||
  <!-- Fields -->
 | 
			
		||||
  <field name="id" type="string" indexed="true" required="true" />
 | 
			
		||||
  <field name="title" type="text_en_splitting" indexed="true" stored="true" required="true" />
 | 
			
		||||
  <field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />
 | 
			
		||||
  <field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />
 | 
			
		||||
  <field name="len" type="pint" indexed="false" stored="true" required="true" />
 | 
			
		||||
 | 
			
		||||
  <!-- Copy title to title_prefix for matching -->
 | 
			
		||||
  <field name="title_prefix" type="text_prefix" indexed="true" stored="false" />
 | 
			
		||||
  <copyField source="title" dest="title_prefix" />
 | 
			
		||||
 | 
			
		||||
  <field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>
 | 
			
		||||
 | 
			
		||||
  <!-- Unique Key -->
 | 
			
		||||
  <uniqueKey>id</uniqueKey>
 | 
			
		||||
</schema>
 | 
			
		||||
							
								
								
									
										1076
									
								
								archiver/solr-config-dir/solrconfig.xml
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1076
									
								
								archiver/solr-config-dir/solrconfig.xml
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue