forked from cadence/breezewiki
		
	Add more indexers and Solr configuration
This commit is contained in:
		
							parent
							
								
									43c3f70736
								
							
						
					
					
						commit
						57e0d20657
					
				
					 42 changed files with 9016 additions and 33 deletions
				
			
		
							
								
								
									
										8
									
								
								archiver/solr-config-dir/lang/contractions_ca.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								archiver/solr-config-dir/lang/contractions_ca.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,8 @@ | |||
| # Set of Catalan contractions for ElisionFilter | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| d | ||||
| l | ||||
| m | ||||
| n | ||||
| s | ||||
| t | ||||
							
								
								
									
										15
									
								
								archiver/solr-config-dir/lang/contractions_fr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								archiver/solr-config-dir/lang/contractions_fr.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,15 @@ | |||
| # Set of French contractions for ElisionFilter | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| l | ||||
| m | ||||
| t | ||||
| qu | ||||
| n | ||||
| s | ||||
| j | ||||
| d | ||||
| c | ||||
| jusqu | ||||
| quoiqu | ||||
| lorsqu | ||||
| puisqu | ||||
							
								
								
									
										5
									
								
								archiver/solr-config-dir/lang/contractions_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								archiver/solr-config-dir/lang/contractions_ga.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| # Set of Irish contractions for ElisionFilter | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| d | ||||
| m | ||||
| b | ||||
							
								
								
									
										23
									
								
								archiver/solr-config-dir/lang/contractions_it.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								archiver/solr-config-dir/lang/contractions_it.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,23 @@ | |||
| # Set of Italian contractions for ElisionFilter | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| c | ||||
| l  | ||||
| all  | ||||
| dall  | ||||
| dell  | ||||
| nell  | ||||
| sull  | ||||
| coll  | ||||
| pell  | ||||
| gl  | ||||
| agl  | ||||
| dagl  | ||||
| degl  | ||||
| negl  | ||||
| sugl  | ||||
| un  | ||||
| m  | ||||
| t  | ||||
| s  | ||||
| v  | ||||
| d | ||||
							
								
								
									
										5
									
								
								archiver/solr-config-dir/lang/hyphenations_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								archiver/solr-config-dir/lang/hyphenations_ga.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| # Set of Irish hyphenations for StopFilter | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| h | ||||
| n | ||||
| t | ||||
							
								
								
									
										6
									
								
								archiver/solr-config-dir/lang/stemdict_nl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								archiver/solr-config-dir/lang/stemdict_nl.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,6 @@ | |||
| # Set of overrides for the dutch stemmer | ||||
| # TODO: load this as a resource from the analyzer and sync it in build.xml | ||||
| fiets	fiets | ||||
| bromfiets	bromfiets | ||||
| ei	eier | ||||
| kind	kinder | ||||
							
								
								
									
										420
									
								
								archiver/solr-config-dir/lang/stoptags_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										420
									
								
								archiver/solr-config-dir/lang/stoptags_ja.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,420 @@ | |||
| # | ||||
| # This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. | ||||
| # | ||||
| # Any token with a part-of-speech tag that exactly matches those defined in this | ||||
| # file are removed from the token stream. | ||||
| # | ||||
| # Set your own stoptags by uncommenting the lines below.  Note that comments are | ||||
| # not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists, | ||||
| # etc. that can be useful for building you own stoptag set. | ||||
| # | ||||
| # The entire possible tagset is provided below for convenience. | ||||
| # | ||||
| ##### | ||||
| #  noun: unclassified nouns | ||||
| #名詞 | ||||
| # | ||||
| #  noun-common: Common nouns or nouns where the sub-classification is undefined | ||||
| #名詞-一般 | ||||
| # | ||||
| #  noun-proper: Proper nouns where the sub-classification is undefined  | ||||
| #名詞-固有名詞 | ||||
| # | ||||
| #  noun-proper-misc: miscellaneous proper nouns | ||||
| #名詞-固有名詞-一般 | ||||
| # | ||||
| #  noun-proper-person: Personal names where the sub-classification is undefined | ||||
| #名詞-固有名詞-人名 | ||||
| # | ||||
| #  noun-proper-person-misc: names that cannot be divided into surname and  | ||||
| #  given name; foreign names; names where the surname or given name is unknown. | ||||
| #  e.g. お市の方 | ||||
| #名詞-固有名詞-人名-一般 | ||||
| # | ||||
| #  noun-proper-person-surname: Mainly Japanese surnames. | ||||
| #  e.g. 山田 | ||||
| #名詞-固有名詞-人名-姓 | ||||
| # | ||||
| #  noun-proper-person-given_name: Mainly Japanese given names. | ||||
| #  e.g. 太郎 | ||||
| #名詞-固有名詞-人名-名 | ||||
| # | ||||
| #  noun-proper-organization: Names representing organizations. | ||||
| #  e.g. 通産省, NHK | ||||
| #名詞-固有名詞-組織 | ||||
| # | ||||
| #  noun-proper-place: Place names where the sub-classification is undefined | ||||
| #名詞-固有名詞-地域 | ||||
| # | ||||
| #  noun-proper-place-misc: Place names excluding countries. | ||||
| #  e.g. アジア, バルセロナ, 京都 | ||||
| #名詞-固有名詞-地域-一般 | ||||
| # | ||||
| #  noun-proper-place-country: Country names.  | ||||
| #  e.g. 日本, オーストラリア | ||||
| #名詞-固有名詞-地域-国 | ||||
| # | ||||
| #  noun-pronoun: Pronouns where the sub-classification is undefined | ||||
| #名詞-代名詞 | ||||
| # | ||||
| #  noun-pronoun-misc: miscellaneous pronouns:  | ||||
| #  e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ | ||||
| #名詞-代名詞-一般 | ||||
| # | ||||
| #  noun-pronoun-contraction: Spoken language contraction made by combining a  | ||||
| #  pronoun and the particle 'wa'. | ||||
| #  e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ  | ||||
| #名詞-代名詞-縮約 | ||||
| # | ||||
| #  noun-adverbial: Temporal nouns such as names of days or months that behave  | ||||
| #  like adverbs. Nouns that represent amount or ratios and can be used adverbially, | ||||
| #  e.g. 金曜, 一月, 午後, 少量 | ||||
| #名詞-副詞可能 | ||||
| # | ||||
| #  noun-verbal: Nouns that take arguments with case and can appear followed by  | ||||
| #  'suru' and related verbs (する, できる, なさる, くださる) | ||||
| #  e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り | ||||
| #名詞-サ変接続 | ||||
| # | ||||
| #  noun-adjective-base: The base form of adjectives, words that appear before な ("na") | ||||
| #  e.g. 健康, 安易, 駄目, だめ | ||||
| #名詞-形容動詞語幹 | ||||
| # | ||||
| #  noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. | ||||
| #  e.g. 0, 1, 2, 何, 数, 幾 | ||||
| #名詞-数 | ||||
| # | ||||
| #  noun-affix: noun affixes where the sub-classification is undefined | ||||
| #名詞-非自立 | ||||
| # | ||||
| #  noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that  | ||||
| #  attach to the base form of inflectional words, words that cannot be classified  | ||||
| #  into any of the other categories below. This category includes indefinite nouns. | ||||
| #  e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,  | ||||
| #       順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,  | ||||
| #       拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, | ||||
| #       わり, 割り, 割, ん-口語/, もん-口語/ | ||||
| #名詞-非自立-一般 | ||||
| # | ||||
| #  noun-affix-adverbial: noun affixes that that can behave as adverbs. | ||||
| #  e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,  | ||||
| #       上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,  | ||||
| #       最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,  | ||||
| #       とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,  | ||||
| #       儘, 侭, みぎり, 矢先 | ||||
| #名詞-非自立-副詞可能 | ||||
| # | ||||
| #  noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars  | ||||
| #  with the stem よう(だ) ("you(da)"). | ||||
| #  e.g.  よう, やう, 様 (よう) | ||||
| #名詞-非自立-助動詞語幹 | ||||
| #   | ||||
| #  noun-affix-adjective-base: noun affixes that can connect to the indeclinable | ||||
| #  connection form な (aux "da"). | ||||
| #  e.g. みたい, ふう | ||||
| #名詞-非自立-形容動詞語幹 | ||||
| # | ||||
| #  noun-special: special nouns where the sub-classification is undefined. | ||||
| #名詞-特殊 | ||||
| # | ||||
| #  noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is  | ||||
| #  treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base  | ||||
| #  form of inflectional words. | ||||
| #  e.g. そう | ||||
| #名詞-特殊-助動詞語幹 | ||||
| # | ||||
| #  noun-suffix: noun suffixes where the sub-classification is undefined. | ||||
| #名詞-接尾 | ||||
| # | ||||
| #  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect  | ||||
| #  to ガル or タイ and can combine into compound nouns, words that cannot be classified into | ||||
| #  any of the other categories below. In general, this category is more inclusive than  | ||||
| #  接尾語 ("suffix") and is usually the last element in a compound noun. | ||||
| #  e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, | ||||
| #       よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 | ||||
| #名詞-接尾-一般 | ||||
| # | ||||
| #  noun-suffix-person: Suffixes that form nouns and attach to person names more often | ||||
| #  than other nouns. | ||||
| #  e.g. 君, 様, 著 | ||||
| #名詞-接尾-人名 | ||||
| # | ||||
| #  noun-suffix-place: Suffixes that form nouns and attach to place names more often  | ||||
| #  than other nouns. | ||||
| #  e.g. 町, 市, 県 | ||||
| #名詞-接尾-地域 | ||||
| # | ||||
| #  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that  | ||||
| #  can appear before スル ("suru"). | ||||
| #  e.g. 化, 視, 分け, 入り, 落ち, 買い | ||||
| #名詞-接尾-サ変接続 | ||||
| # | ||||
| #  noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,  | ||||
| #  is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the  | ||||
| #  conjunctive form of inflectional words. | ||||
| #  e.g. そう | ||||
| #名詞-接尾-助動詞語幹 | ||||
| # | ||||
| #  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive  | ||||
| #  form of inflectional words and appear before the copula だ ("da"). | ||||
| #  e.g. 的, げ, がち | ||||
| #名詞-接尾-形容動詞語幹 | ||||
| # | ||||
| #  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. | ||||
| #  e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) | ||||
| #名詞-接尾-副詞可能 | ||||
| # | ||||
| #  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category  | ||||
| #  is more inclusive than 助数詞 ("classifier") and includes common nouns that attach  | ||||
| #  to numbers. | ||||
| #  e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 | ||||
| #名詞-接尾-助数詞 | ||||
| # | ||||
| #  noun-suffix-special: Special suffixes that mainly attach to inflecting words. | ||||
| #  e.g. (楽し) さ, (考え) 方 | ||||
| #名詞-接尾-特殊 | ||||
| # | ||||
| #  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words  | ||||
| #  together. | ||||
| #  e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) | ||||
| #名詞-接続詞的 | ||||
| # | ||||
| #  noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are  | ||||
| #  semantically verb-like. | ||||
| #  e.g. ごらん, ご覧, 御覧, 頂戴 | ||||
| #名詞-動詞非自立的 | ||||
| # | ||||
| #  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,  | ||||
| #  dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")  | ||||
| #  is いわく ("iwaku"). | ||||
| #名詞-引用文字列 | ||||
| # | ||||
| #  noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and | ||||
| #  behave like an adjective. | ||||
| #  e.g. 申し訳, 仕方, とんでも, 違い | ||||
| #名詞-ナイ形容詞語幹 | ||||
| # | ||||
| ##### | ||||
| #  prefix: unclassified prefixes | ||||
| #接頭詞 | ||||
| # | ||||
| #  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)  | ||||
| #  excluding numerical expressions. | ||||
| #  e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) | ||||
| #接頭詞-名詞接続 | ||||
| # | ||||
| #  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb | ||||
| #  in conjunctive form followed by なる/なさる/くださる. | ||||
| #  e.g. お (読みなさい), お (座り) | ||||
| #接頭詞-動詞接続 | ||||
| # | ||||
| #  prefix-adjectival: Prefixes that attach to adjectives. | ||||
| #  e.g. お (寒いですねえ), バカ (でかい) | ||||
| #接頭詞-形容詞接続 | ||||
| # | ||||
| #  prefix-numerical: Prefixes that attach to numerical expressions. | ||||
| #  e.g. 約, およそ, 毎時 | ||||
| #接頭詞-数接続 | ||||
| # | ||||
| ##### | ||||
| #  verb: unclassified verbs | ||||
| #動詞 | ||||
| # | ||||
| #  verb-main: | ||||
| #動詞-自立 | ||||
| # | ||||
| #  verb-auxiliary: | ||||
| #動詞-非自立 | ||||
| # | ||||
| #  verb-suffix: | ||||
| #動詞-接尾 | ||||
| # | ||||
| ##### | ||||
| #  adjective: unclassified adjectives | ||||
| #形容詞 | ||||
| # | ||||
| #  adjective-main: | ||||
| #形容詞-自立 | ||||
| # | ||||
| #  adjective-auxiliary: | ||||
| #形容詞-非自立 | ||||
| # | ||||
| #  adjective-suffix: | ||||
| #形容詞-接尾 | ||||
| # | ||||
| ##### | ||||
| #  adverb: unclassified adverbs | ||||
| #副詞 | ||||
| # | ||||
| #  adverb-misc: Words that can be segmented into one unit and where adnominal  | ||||
| #  modification is not possible. | ||||
| #  e.g. あいかわらず, 多分 | ||||
| #副詞-一般 | ||||
| # | ||||
| #  adverb-particle_conjunction: Adverbs that can be followed by の, は, に,  | ||||
| #  な, する, だ, etc. | ||||
| #  e.g. こんなに, そんなに, あんなに, なにか, なんでも | ||||
| #副詞-助詞類接続 | ||||
| # | ||||
| ##### | ||||
| #  adnominal: Words that only have noun-modifying forms. | ||||
| #  e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,  | ||||
| #       どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,  | ||||
| #       「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き | ||||
| #連体詞 | ||||
| # | ||||
| ##### | ||||
| #  conjunction: Conjunctions that can occur independently. | ||||
| #  e.g. が, けれども, そして, じゃあ, それどころか | ||||
| 接続詞 | ||||
| # | ||||
| ##### | ||||
| #  particle: unclassified particles. | ||||
| 助詞 | ||||
| # | ||||
| #  particle-case: case particles where the subclassification is undefined. | ||||
| 助詞-格助詞 | ||||
| # | ||||
| #  particle-case-misc: Case particles. | ||||
| #  e.g. から, が, で, と, に, へ, より, を, の, にて | ||||
| 助詞-格助詞-一般 | ||||
| # | ||||
| #  particle-case-quote: the "to" that appears after nouns, a person’s speech,  | ||||
| #  quotation marks, expressions of decisions from a meeting, reasons, judgements, | ||||
| #  conjectures, etc. | ||||
| #  e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) | ||||
| 助詞-格助詞-引用 | ||||
| # | ||||
| #  particle-case-compound: Compounds of particles and verbs that mainly behave  | ||||
| #  like case particles. | ||||
| #  e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, | ||||
| #       にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,  | ||||
| #       にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,  | ||||
| #       に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,  | ||||
| #       に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, | ||||
| #       にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,  | ||||
| #       にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, | ||||
| #       って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ | ||||
| 助詞-格助詞-連語 | ||||
| # | ||||
| #  particle-conjunctive: | ||||
| #  e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,  | ||||
| #       ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,  | ||||
| #       (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ | ||||
| 助詞-接続助詞 | ||||
| # | ||||
| #  particle-dependency: | ||||
| #  e.g. こそ, さえ, しか, すら, は, も, ぞ | ||||
| 助詞-係助詞 | ||||
| # | ||||
| #  particle-adverbial: | ||||
| #  e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,  | ||||
| #       (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, | ||||
| #       (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,  | ||||
| #       (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, | ||||
| #       ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) | ||||
| 助詞-副助詞 | ||||
| # | ||||
| #  particle-interjective: particles with interjective grammatical roles. | ||||
| #  e.g. (松島) や | ||||
| 助詞-間投助詞 | ||||
| # | ||||
| #  particle-coordinate: | ||||
| #  e.g. と, たり, だの, だり, とか, なり, や, やら | ||||
| 助詞-並立助詞 | ||||
| # | ||||
| #  particle-final: | ||||
| #  e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,  | ||||
| #       ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ | ||||
| 助詞-終助詞 | ||||
| # | ||||
| #  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is  | ||||
| #  adverbial, conjunctive, or sentence final. For example: | ||||
| #       (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 | ||||
| #       (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 | ||||
| #           「(祈りが届いたせい) か (, 試験に合格した.)」 | ||||
| #       (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 | ||||
| #  e.g. か | ||||
| 助詞-副助詞/並立助詞/終助詞 | ||||
| # | ||||
| #  particle-adnominalizer: The "no" that attaches to nouns and modifies  | ||||
| #  non-inflectional words. | ||||
| 助詞-連体化 | ||||
| # | ||||
| #  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs  | ||||
| #  that are giongo, giseigo, or gitaigo. | ||||
| #  e.g. に, と | ||||
| 助詞-副詞化 | ||||
| # | ||||
| #  particle-special: A particle that does not fit into one of the above classifications.  | ||||
| #  This includes particles that are used in Tanka, Haiku, and other poetry. | ||||
| #  e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) | ||||
| 助詞-特殊 | ||||
| # | ||||
| ##### | ||||
| #  auxiliary-verb: | ||||
| 助動詞 | ||||
| # | ||||
| ##### | ||||
| #  interjection: Greetings and other exclamations. | ||||
| #  e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,  | ||||
| #       いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい | ||||
| #感動詞 | ||||
| # | ||||
| ##### | ||||
| #  symbol: unclassified Symbols. | ||||
| 記号 | ||||
| # | ||||
| #  symbol-misc: A general symbol not in one of the categories below. | ||||
| #  e.g. [○◎@$〒→+] | ||||
| 記号-一般 | ||||
| # | ||||
| #  symbol-comma: Commas | ||||
| #  e.g. [,、] | ||||
| 記号-読点 | ||||
| # | ||||
| #  symbol-period: Periods and full stops. | ||||
| #  e.g. [..。] | ||||
| 記号-句点 | ||||
| # | ||||
| #  symbol-space: Full-width whitespace. | ||||
| 記号-空白 | ||||
| # | ||||
| #  symbol-open_bracket: | ||||
| #  e.g. [({‘“『【] | ||||
| 記号-括弧開 | ||||
| # | ||||
| #  symbol-close_bracket: | ||||
| #  e.g. [)}’”』」】] | ||||
| 記号-括弧閉 | ||||
| # | ||||
| #  symbol-alphabetic: | ||||
| #記号-アルファベット | ||||
| # | ||||
| ##### | ||||
| #  other: unclassified other | ||||
| #その他 | ||||
| # | ||||
| #  other-interjection: Words that are hard to classify as noun-suffixes or  | ||||
| #  sentence-final particles. | ||||
| #  e.g. (だ)ァ | ||||
| その他-間投 | ||||
| # | ||||
| ##### | ||||
| #  filler: Aizuchi that occurs during a conversation or sounds inserted as filler. | ||||
| #  e.g. あの, うんと, えと | ||||
| フィラー | ||||
| # | ||||
| ##### | ||||
| #  non-verbal: non-verbal sound. | ||||
| 非言語音 | ||||
| # | ||||
| ##### | ||||
| #  fragment: | ||||
| #語断片 | ||||
| # | ||||
| ##### | ||||
| #  unknown: unknown part of speech. | ||||
| #未知語 | ||||
| # | ||||
| ##### End of file | ||||
							
								
								
									
										125
									
								
								archiver/solr-config-dir/lang/stopwords_ar.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								archiver/solr-config-dir/lang/stopwords_ar.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,125 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | ||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | ||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | ||||
| # Cleaned on October 11, 2009 (not normalized, so use before normalization) | ||||
| # This means that when modifying this list, you might need to add some  | ||||
| # redundant entries, for example containing forms with both أ and ا | ||||
| من | ||||
| ومن | ||||
| منها | ||||
| منه | ||||
| في | ||||
| وفي | ||||
| فيها | ||||
| فيه | ||||
| و | ||||
| ف | ||||
| ثم | ||||
| او | ||||
| أو | ||||
| ب | ||||
| بها | ||||
| به | ||||
| ا | ||||
| أ | ||||
| اى | ||||
| اي | ||||
| أي | ||||
| أى | ||||
| لا | ||||
| ولا | ||||
| الا | ||||
| ألا | ||||
| إلا | ||||
| لكن | ||||
| ما | ||||
| وما | ||||
| كما | ||||
| فما | ||||
| عن | ||||
| مع | ||||
| اذا | ||||
| إذا | ||||
| ان | ||||
| أن | ||||
| إن | ||||
| انها | ||||
| أنها | ||||
| إنها | ||||
| انه | ||||
| أنه | ||||
| إنه | ||||
| بان | ||||
| بأن | ||||
| فان | ||||
| فأن | ||||
| وان | ||||
| وأن | ||||
| وإن | ||||
| التى | ||||
| التي | ||||
| الذى | ||||
| الذي | ||||
| الذين | ||||
| الى | ||||
| الي | ||||
| إلى | ||||
| إلي | ||||
| على | ||||
| عليها | ||||
| عليه | ||||
| اما | ||||
| أما | ||||
| إما | ||||
| ايضا | ||||
| أيضا | ||||
| كل | ||||
| وكل | ||||
| لم | ||||
| ولم | ||||
| لن | ||||
| ولن | ||||
| هى | ||||
| هي | ||||
| هو | ||||
| وهى | ||||
| وهي | ||||
| وهو | ||||
| فهى | ||||
| فهي | ||||
| فهو | ||||
| انت | ||||
| أنت | ||||
| لك | ||||
| لها | ||||
| له | ||||
| هذه | ||||
| هذا | ||||
| تلك | ||||
| ذلك | ||||
| هناك | ||||
| كانت | ||||
| كان | ||||
| يكون | ||||
| تكون | ||||
| وكانت | ||||
| وكان | ||||
| غير | ||||
| بعض | ||||
| قد | ||||
| نحو | ||||
| بين | ||||
| بينما | ||||
| منذ | ||||
| ضمن | ||||
| حيث | ||||
| الان | ||||
| الآن | ||||
| خلال | ||||
| بعد | ||||
| قبل | ||||
| حتى | ||||
| عند | ||||
| عندما | ||||
| لدى | ||||
| جميع | ||||
							
								
								
									
										193
									
								
								archiver/solr-config-dir/lang/stopwords_bg.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										193
									
								
								archiver/solr-config-dir/lang/stopwords_bg.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,193 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | ||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | ||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | ||||
| а | ||||
| аз | ||||
| ако | ||||
| ала | ||||
| бе | ||||
| без | ||||
| беше | ||||
| би | ||||
| бил | ||||
| била | ||||
| били | ||||
| било | ||||
| близо | ||||
| бъдат | ||||
| бъде | ||||
| бяха | ||||
| в | ||||
| вас | ||||
| ваш | ||||
| ваша | ||||
| вероятно | ||||
| вече | ||||
| взема | ||||
| ви | ||||
| вие | ||||
| винаги | ||||
| все | ||||
| всеки | ||||
| всички | ||||
| всичко | ||||
| всяка | ||||
| във | ||||
| въпреки | ||||
| върху | ||||
| г | ||||
| ги | ||||
| главно | ||||
| го | ||||
| д | ||||
| да | ||||
| дали | ||||
| до | ||||
| докато | ||||
| докога | ||||
| дори | ||||
| досега | ||||
| доста | ||||
| е | ||||
| едва | ||||
| един | ||||
| ето | ||||
| за | ||||
| зад | ||||
| заедно | ||||
| заради | ||||
| засега | ||||
| затова | ||||
| защо | ||||
| защото | ||||
| и | ||||
| из | ||||
| или | ||||
| им | ||||
| има | ||||
| имат | ||||
| иска | ||||
| й | ||||
| каза | ||||
| как | ||||
| каква | ||||
| какво | ||||
| както | ||||
| какъв | ||||
| като | ||||
| кога | ||||
| когато | ||||
| което | ||||
| които | ||||
| кой | ||||
| който | ||||
| колко | ||||
| която | ||||
| къде | ||||
| където | ||||
| към | ||||
| ли | ||||
| м | ||||
| ме | ||||
| между | ||||
| мен | ||||
| ми | ||||
| мнозина | ||||
| мога | ||||
| могат | ||||
| може | ||||
| моля | ||||
| момента | ||||
| му | ||||
| н | ||||
| на | ||||
| над | ||||
| назад | ||||
| най | ||||
| направи | ||||
| напред | ||||
| например | ||||
| нас | ||||
| не | ||||
| него | ||||
| нея | ||||
| ни | ||||
| ние | ||||
| никой | ||||
| нито | ||||
| но | ||||
| някои | ||||
| някой | ||||
| няма | ||||
| обаче | ||||
| около | ||||
| освен | ||||
| особено | ||||
| от | ||||
| отгоре | ||||
| отново | ||||
| още | ||||
| пак | ||||
| по | ||||
| повече | ||||
| повечето | ||||
| под | ||||
| поне | ||||
| поради | ||||
| после | ||||
| почти | ||||
| прави | ||||
| пред | ||||
| преди | ||||
| през | ||||
| при | ||||
| пък | ||||
| първо | ||||
| с | ||||
| са | ||||
| само | ||||
| се | ||||
| сега | ||||
| си | ||||
| скоро | ||||
| след | ||||
| сме | ||||
| според | ||||
| сред | ||||
| срещу | ||||
| сте | ||||
| съм | ||||
| със | ||||
| също | ||||
| т | ||||
| тази | ||||
| така | ||||
| такива | ||||
| такъв | ||||
| там | ||||
| твой | ||||
| те | ||||
| тези | ||||
| ти | ||||
| тн | ||||
| то | ||||
| това | ||||
| тогава | ||||
| този | ||||
| той | ||||
| толкова | ||||
| точно | ||||
| трябва | ||||
| тук | ||||
| тъй | ||||
| тя | ||||
| тях | ||||
| у | ||||
| харесва | ||||
| ч | ||||
| че | ||||
| често | ||||
| чрез | ||||
| ще | ||||
| щом | ||||
| я | ||||
							
								
								
									
										220
									
								
								archiver/solr-config-dir/lang/stopwords_ca.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										220
									
								
								archiver/solr-config-dir/lang/stopwords_ca.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,220 @@ | |||
| # Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed) | ||||
| a | ||||
| abans | ||||
| ací | ||||
| ah | ||||
| així | ||||
| això | ||||
| al | ||||
| als | ||||
| aleshores | ||||
| algun | ||||
| alguna | ||||
| algunes | ||||
| alguns | ||||
| alhora | ||||
| allà | ||||
| allí | ||||
| allò | ||||
| altra | ||||
| altre | ||||
| altres | ||||
| amb | ||||
| ambdós | ||||
| ambdues | ||||
| apa | ||||
| aquell | ||||
| aquella | ||||
| aquelles | ||||
| aquells | ||||
| aquest | ||||
| aquesta | ||||
| aquestes | ||||
| aquests | ||||
| aquí | ||||
| baix | ||||
| cada | ||||
| cadascú | ||||
| cadascuna | ||||
| cadascunes | ||||
| cadascuns | ||||
| com | ||||
| contra | ||||
| d'un | ||||
| d'una | ||||
| d'unes | ||||
| d'uns | ||||
| dalt | ||||
| de | ||||
| del | ||||
| dels | ||||
| des | ||||
| després | ||||
| dins | ||||
| dintre | ||||
| donat | ||||
| doncs | ||||
| durant | ||||
| e | ||||
| eh | ||||
| el | ||||
| els | ||||
| em | ||||
| en | ||||
| encara | ||||
| ens | ||||
| entre | ||||
| érem | ||||
| eren | ||||
| éreu | ||||
| es | ||||
| és | ||||
| esta | ||||
| està | ||||
| estàvem | ||||
| estaven | ||||
| estàveu | ||||
| esteu | ||||
| et | ||||
| etc | ||||
| ets | ||||
| fins | ||||
| fora | ||||
| gairebé | ||||
| ha | ||||
| han | ||||
| has | ||||
| havia | ||||
| he | ||||
| hem | ||||
| heu | ||||
| hi  | ||||
| ho | ||||
| i | ||||
| igual | ||||
| iguals | ||||
| ja | ||||
| l'hi | ||||
| la | ||||
| les | ||||
| li | ||||
| li'n | ||||
| llavors | ||||
| m'he | ||||
| ma | ||||
| mal | ||||
| malgrat | ||||
| mateix | ||||
| mateixa | ||||
| mateixes | ||||
| mateixos | ||||
| me | ||||
| mentre | ||||
| més | ||||
| meu | ||||
| meus | ||||
| meva | ||||
| meves | ||||
| molt | ||||
| molta | ||||
| moltes | ||||
| molts | ||||
| mon | ||||
| mons | ||||
| n'he | ||||
| n'hi | ||||
| ne | ||||
| ni | ||||
| no | ||||
| nogensmenys | ||||
| només | ||||
| nosaltres | ||||
| nostra | ||||
| nostre | ||||
| nostres | ||||
| o | ||||
| oh | ||||
| oi | ||||
| on | ||||
| pas | ||||
| pel | ||||
| pels | ||||
| per | ||||
| però | ||||
| perquè | ||||
| poc  | ||||
| poca | ||||
| pocs | ||||
| poques | ||||
| potser | ||||
| propi | ||||
| qual | ||||
| quals | ||||
| quan | ||||
| quant  | ||||
| que | ||||
| què | ||||
| quelcom | ||||
| qui | ||||
| quin | ||||
| quina | ||||
| quines | ||||
| quins | ||||
| s'ha | ||||
| s'han | ||||
| sa | ||||
| semblant | ||||
| semblants | ||||
| ses | ||||
| seu  | ||||
| seus | ||||
| seva | ||||
| seva | ||||
| seves | ||||
| si | ||||
| sobre | ||||
| sobretot | ||||
| sóc | ||||
| solament | ||||
| sols | ||||
| son  | ||||
| són | ||||
| sons  | ||||
| sota | ||||
| sou | ||||
| t'ha | ||||
| t'han | ||||
| t'he | ||||
| ta | ||||
| tal | ||||
| també | ||||
| tampoc | ||||
| tan | ||||
| tant | ||||
| tanta | ||||
| tantes | ||||
| teu | ||||
| teus | ||||
| teva | ||||
| teves | ||||
| ton | ||||
| tons | ||||
| tot | ||||
| tota | ||||
| totes | ||||
| tots | ||||
| un | ||||
| una | ||||
| unes | ||||
| uns | ||||
| us | ||||
| va | ||||
| vaig | ||||
| vam | ||||
| van | ||||
| vas | ||||
| veu | ||||
| vosaltres | ||||
| vostra | ||||
| vostre | ||||
| vostres | ||||
							
								
								
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_cz.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_cz.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,172 @@ | |||
| a | ||||
| s | ||||
| k | ||||
| o | ||||
| i | ||||
| u | ||||
| v | ||||
| z | ||||
| dnes | ||||
| cz | ||||
| tímto | ||||
| budeš | ||||
| budem | ||||
| byli | ||||
| jseš | ||||
| můj | ||||
| svým | ||||
| ta | ||||
| tomto | ||||
| tohle | ||||
| tuto | ||||
| tyto | ||||
| jej | ||||
| zda | ||||
| proč | ||||
| máte | ||||
| tato | ||||
| kam | ||||
| tohoto | ||||
| kdo | ||||
| kteří | ||||
| mi | ||||
| nám | ||||
| tom | ||||
| tomuto | ||||
| mít | ||||
| nic | ||||
| proto | ||||
| kterou | ||||
| byla | ||||
| toho | ||||
| protože | ||||
| asi | ||||
| ho | ||||
| naši | ||||
| napište | ||||
| re | ||||
| což | ||||
| tím | ||||
| takže | ||||
| svých | ||||
| její | ||||
| svými | ||||
| jste | ||||
| aj | ||||
| tu | ||||
| tedy | ||||
| teto | ||||
| bylo | ||||
| kde | ||||
| ke | ||||
| pravé | ||||
| ji | ||||
| nad | ||||
| nejsou | ||||
| či | ||||
| pod | ||||
| téma | ||||
| mezi | ||||
| přes | ||||
| ty | ||||
| pak | ||||
| vám | ||||
| ani | ||||
| když | ||||
| však | ||||
| neg | ||||
| jsem | ||||
| tento | ||||
| článku | ||||
| články | ||||
| aby | ||||
| jsme | ||||
| před | ||||
| pta | ||||
| jejich | ||||
| byl | ||||
| ještě | ||||
| až | ||||
| bez | ||||
| také | ||||
| pouze | ||||
| první | ||||
| vaše | ||||
| která | ||||
| nás | ||||
| nový | ||||
| tipy | ||||
| pokud | ||||
| může | ||||
| strana | ||||
| jeho | ||||
| své | ||||
| jiné | ||||
| zprávy | ||||
| nové | ||||
| není | ||||
| vás | ||||
| jen | ||||
| podle | ||||
| zde | ||||
| už | ||||
| být | ||||
| více | ||||
| bude | ||||
| již | ||||
| než | ||||
| který | ||||
| by | ||||
| které | ||||
| co | ||||
| nebo | ||||
| ten | ||||
| tak | ||||
| má | ||||
| při | ||||
| od | ||||
| po | ||||
| jsou | ||||
| jak | ||||
| další | ||||
| ale | ||||
| si | ||||
| se | ||||
| ve | ||||
| to | ||||
| jako | ||||
| za | ||||
| zpět | ||||
| ze | ||||
| do | ||||
| pro | ||||
| je | ||||
| na | ||||
| atd | ||||
| atp | ||||
| jakmile | ||||
| přičemž | ||||
| já | ||||
| on | ||||
| ona | ||||
| ono | ||||
| oni | ||||
| ony | ||||
| my | ||||
| vy | ||||
| jí | ||||
| ji | ||||
| mě | ||||
| mne | ||||
| jemu | ||||
| tomu | ||||
| těm | ||||
| těmu | ||||
| němu | ||||
| němuž | ||||
| jehož | ||||
| jíž | ||||
| jelikož | ||||
| jež | ||||
| jakož | ||||
| načež | ||||
							
								
								
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_da.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_da.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,110 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Danish stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | This is a ranked list (commonest to rarest) of stopwords derived from | ||||
|  | a large text sample. | ||||
| 
 | ||||
| 
 | ||||
| og           | and | ||||
| i            | in | ||||
| jeg          | I | ||||
| det          | that (dem. pronoun)/it (pers. pronoun) | ||||
| at           | that (in front of a sentence)/to (with infinitive) | ||||
| en           | a/an | ||||
| den          | it (pers. pronoun)/that (dem. pronoun) | ||||
| til          | to/at/for/until/against/by/of/into, more | ||||
| er           | present tense of "to be" | ||||
| som          | who, as | ||||
| på           | on/upon/in/on/at/to/after/of/with/for, on | ||||
| de           | they | ||||
| med          | with/by/in, along | ||||
| han          | he | ||||
| af           | of/by/from/off/for/in/with/on, off | ||||
| for          | at/for/to/from/by/of/ago, in front/before, because | ||||
| ikke         | not | ||||
| der          | who/which, there/those | ||||
| var          | past tense of "to be" | ||||
| mig          | me/myself | ||||
| sig          | oneself/himself/herself/itself/themselves | ||||
| men          | but | ||||
| et           | a/an/one, one (number), someone/somebody/one | ||||
| har          | present tense of "to have" | ||||
| om           | round/about/for/in/a, about/around/down, if | ||||
| vi           | we | ||||
| min          | my | ||||
| havde        | past tense of "to have" | ||||
| ham          | him | ||||
| hun          | she | ||||
| nu           | now | ||||
| over         | over/above/across/by/beyond/past/on/about, over/past | ||||
| da           | then, when/as/since | ||||
| fra          | from/off/since, off, since | ||||
| du           | you | ||||
| ud           | out | ||||
| sin          | his/her/its/one's | ||||
| dem          | them | ||||
| os           | us/ourselves | ||||
| op           | up | ||||
| man          | you/one | ||||
| hans         | his | ||||
| hvor         | where | ||||
| eller        | or | ||||
| hvad         | what | ||||
| skal         | must/shall etc. | ||||
| selv         | myself/youself/herself/ourselves etc., even | ||||
| her          | here | ||||
| alle         | all/everyone/everybody etc. | ||||
| vil          | will (verb) | ||||
| blev         | past tense of "to stay/to remain/to get/to become" | ||||
| kunne        | could | ||||
| ind          | in | ||||
| når          | when | ||||
| være         | present tense of "to be" | ||||
| dog          | however/yet/after all | ||||
| noget        | something | ||||
| ville        | would | ||||
| jo           | you know/you see (adv), yes | ||||
| deres        | their/theirs | ||||
| efter        | after/behind/according to/for/by/from, later/afterwards | ||||
| ned          | down | ||||
| skulle       | should | ||||
| denne        | this | ||||
| end          | than | ||||
| dette        | this | ||||
| mit          | my/mine | ||||
| også         | also | ||||
| under        | under/beneath/below/during, below/underneath | ||||
| have         | have | ||||
| dig          | you | ||||
| anden        | other | ||||
| hende        | her | ||||
| mine         | my | ||||
| alt          | everything | ||||
| meget        | much/very, plenty of | ||||
| sit          | his, her, its, one's | ||||
| sine         | his, her, its, one's | ||||
| vor          | our | ||||
| mod          | against | ||||
| disse        | these | ||||
| hvis         | if | ||||
| din          | your/yours | ||||
| nogle        | some | ||||
| hos          | by/at | ||||
| blive        | be/become | ||||
| mange        | many | ||||
| ad           | by/through | ||||
| bliver       | present tense of "to be/to become" | ||||
| hendes       | her/hers | ||||
| været        | be | ||||
| thi          | for (conj) | ||||
| jer          | you | ||||
| sådan        | such, like this/like that | ||||
							
								
								
									
										294
									
								
								archiver/solr-config-dir/lang/stopwords_de.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								archiver/solr-config-dir/lang/stopwords_de.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,294 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A German stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | The number of forms in this list is reduced significantly by passing it | ||||
|  | through the German stemmer. | ||||
| 
 | ||||
| 
 | ||||
| aber           |  but | ||||
| 
 | ||||
| alle           |  all | ||||
| allem | ||||
| allen | ||||
| aller | ||||
| alles | ||||
| 
 | ||||
| als            |  than, as | ||||
| also           |  so | ||||
| am             |  an + dem | ||||
| an             |  at | ||||
| 
 | ||||
| ander          |  other | ||||
| andere | ||||
| anderem | ||||
| anderen | ||||
| anderer | ||||
| anderes | ||||
| anderm | ||||
| andern | ||||
| anderr | ||||
| anders | ||||
| 
 | ||||
| auch           |  also | ||||
| auf            |  on | ||||
| aus            |  out of | ||||
| bei            |  by | ||||
| bin            |  am | ||||
| bis            |  until | ||||
| bist           |  art | ||||
| da             |  there | ||||
| damit          |  with it | ||||
| dann           |  then | ||||
| 
 | ||||
| der            |  the | ||||
| den | ||||
| des | ||||
| dem | ||||
| die | ||||
| das | ||||
| 
 | ||||
| daß            |  that | ||||
| 
 | ||||
| derselbe       |  the same | ||||
| derselben | ||||
| denselben | ||||
| desselben | ||||
| demselben | ||||
| dieselbe | ||||
| dieselben | ||||
| dasselbe | ||||
| 
 | ||||
| dazu           |  to that | ||||
| 
 | ||||
| dein           |  thy | ||||
| deine | ||||
| deinem | ||||
| deinen | ||||
| deiner | ||||
| deines | ||||
| 
 | ||||
| denn           |  because | ||||
| 
 | ||||
| derer          |  of those | ||||
| dessen         |  of him | ||||
| 
 | ||||
| dich           |  thee | ||||
| dir            |  to thee | ||||
| du             |  thou | ||||
| 
 | ||||
| dies           |  this | ||||
| diese | ||||
| diesem | ||||
| diesen | ||||
| dieser | ||||
| dieses | ||||
| 
 | ||||
| 
 | ||||
| doch           |  (several meanings) | ||||
| dort           |  (over) there | ||||
| 
 | ||||
| 
 | ||||
| durch          |  through | ||||
| 
 | ||||
| ein            |  a | ||||
| eine | ||||
| einem | ||||
| einen | ||||
| einer | ||||
| eines | ||||
| 
 | ||||
| einig          |  some | ||||
| einige | ||||
| einigem | ||||
| einigen | ||||
| einiger | ||||
| einiges | ||||
| 
 | ||||
| einmal         |  once | ||||
| 
 | ||||
| er             |  he | ||||
| ihn            |  him | ||||
| ihm            |  to him | ||||
| 
 | ||||
| es             |  it | ||||
| etwas          |  something | ||||
| 
 | ||||
| euer           |  your | ||||
| eure | ||||
| eurem | ||||
| euren | ||||
| eurer | ||||
| eures | ||||
| 
 | ||||
| für            |  for | ||||
| gegen          |  towards | ||||
| gewesen        |  p.p. of sein | ||||
| hab            |  have | ||||
| habe           |  have | ||||
| haben          |  have | ||||
| hat            |  has | ||||
| hatte          |  had | ||||
| hatten         |  had | ||||
| hier           |  here | ||||
| hin            |  there | ||||
| hinter         |  behind | ||||
| 
 | ||||
| ich            |  I | ||||
| mich           |  me | ||||
| mir            |  to me | ||||
| 
 | ||||
| 
 | ||||
| ihr            |  you, to her | ||||
| ihre | ||||
| ihrem | ||||
| ihren | ||||
| ihrer | ||||
| ihres | ||||
| euch           |  to you | ||||
| 
 | ||||
| im             |  in + dem | ||||
| in             |  in | ||||
| indem          |  while | ||||
| ins            |  in + das | ||||
| ist            |  is | ||||
| 
 | ||||
| jede           |  each, every | ||||
| jedem | ||||
| jeden | ||||
| jeder | ||||
| jedes | ||||
| 
 | ||||
| jene           |  that | ||||
| jenem | ||||
| jenen | ||||
| jener | ||||
| jenes | ||||
| 
 | ||||
| jetzt          |  now | ||||
| kann           |  can | ||||
| 
 | ||||
| kein           |  no | ||||
| keine | ||||
| keinem | ||||
| keinen | ||||
| keiner | ||||
| keines | ||||
| 
 | ||||
| können         |  can | ||||
| könnte         |  could | ||||
| machen         |  do | ||||
| man            |  one | ||||
| 
 | ||||
| manche         |  some, many a | ||||
| manchem | ||||
| manchen | ||||
| mancher | ||||
| manches | ||||
| 
 | ||||
| mein           |  my | ||||
| meine | ||||
| meinem | ||||
| meinen | ||||
| meiner | ||||
| meines | ||||
| 
 | ||||
| mit            |  with | ||||
| muss           |  must | ||||
| musste         |  had to | ||||
| nach           |  to(wards) | ||||
| nicht          |  not | ||||
| nichts         |  nothing | ||||
| noch           |  still, yet | ||||
| nun            |  now | ||||
| nur            |  only | ||||
| ob             |  whether | ||||
| oder           |  or | ||||
| ohne           |  without | ||||
| sehr           |  very | ||||
| 
 | ||||
| sein           |  his | ||||
| seine | ||||
| seinem | ||||
| seinen | ||||
| seiner | ||||
| seines | ||||
| 
 | ||||
| selbst         |  self | ||||
| sich           |  herself | ||||
| 
 | ||||
| sie            |  they, she | ||||
| ihnen          |  to them | ||||
| 
 | ||||
| sind           |  are | ||||
| so             |  so | ||||
| 
 | ||||
| solche         |  such | ||||
| solchem | ||||
| solchen | ||||
| solcher | ||||
| solches | ||||
| 
 | ||||
| soll           |  shall | ||||
| sollte         |  should | ||||
| sondern        |  but | ||||
| sonst          |  else | ||||
| über           |  over | ||||
| um             |  about, around | ||||
| und            |  and | ||||
| 
 | ||||
| uns            |  us | ||||
| unse | ||||
| unsem | ||||
| unsen | ||||
| unser | ||||
| unses | ||||
| 
 | ||||
| unter          |  under | ||||
| viel           |  much | ||||
| vom            |  von + dem | ||||
| von            |  from | ||||
| vor            |  before | ||||
| während        |  while | ||||
| war            |  was | ||||
| waren          |  were | ||||
| warst          |  wast | ||||
| was            |  what | ||||
| weg            |  away, off | ||||
| weil           |  because | ||||
| weiter         |  further | ||||
| 
 | ||||
| welche         |  which | ||||
| welchem | ||||
| welchen | ||||
| welcher | ||||
| welches | ||||
| 
 | ||||
| wenn           |  when | ||||
| werde          |  will | ||||
| werden         |  will | ||||
| wie            |  how | ||||
| wieder         |  again | ||||
| will           |  want | ||||
| wir            |  we | ||||
| wird           |  will | ||||
| wirst          |  willst | ||||
| wo             |  where | ||||
| wollen         |  want | ||||
| wollte         |  wanted | ||||
| würde          |  would | ||||
| würden         |  would | ||||
| zu             |  to | ||||
| zum            |  zu + dem | ||||
| zur            |  zu + der | ||||
| zwar           |  indeed | ||||
| zwischen       |  between | ||||
| 
 | ||||
							
								
								
									
										78
									
								
								archiver/solr-config-dir/lang/stopwords_el.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								archiver/solr-config-dir/lang/stopwords_el.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,78 @@ | |||
| # Lucene Greek Stopwords list | ||||
| # Note: by default this file is used after GreekLowerCaseFilter, | ||||
| # so when modifying this file use 'σ' instead of 'ς'  | ||||
| ο | ||||
| η | ||||
| το | ||||
| οι | ||||
| τα | ||||
| του | ||||
| τησ | ||||
| των | ||||
| τον | ||||
| την | ||||
| και  | ||||
| κι | ||||
| κ | ||||
| ειμαι | ||||
| εισαι | ||||
| ειναι | ||||
| ειμαστε | ||||
| ειστε | ||||
| στο | ||||
| στον | ||||
| στη | ||||
| στην | ||||
| μα | ||||
| αλλα | ||||
| απο | ||||
| για | ||||
| προσ | ||||
| με | ||||
| σε | ||||
| ωσ | ||||
| παρα | ||||
| αντι | ||||
| κατα | ||||
| μετα | ||||
| θα | ||||
| να | ||||
| δε | ||||
| δεν | ||||
| μη | ||||
| μην | ||||
| επι | ||||
| ενω | ||||
| εαν | ||||
| αν | ||||
| τοτε | ||||
| που | ||||
| πωσ | ||||
| ποιοσ | ||||
| ποια | ||||
| ποιο | ||||
| ποιοι | ||||
| ποιεσ | ||||
| ποιων | ||||
| ποιουσ | ||||
| αυτοσ | ||||
| αυτη | ||||
| αυτο | ||||
| αυτοι | ||||
| αυτων | ||||
| αυτουσ | ||||
| αυτεσ | ||||
| αυτα | ||||
| εκεινοσ | ||||
| εκεινη | ||||
| εκεινο | ||||
| εκεινοι | ||||
| εκεινεσ | ||||
| εκεινα | ||||
| εκεινων | ||||
| εκεινουσ | ||||
| οπωσ | ||||
| ομωσ | ||||
| ισωσ | ||||
| οσο | ||||
| οτι | ||||
							
								
								
									
										54
									
								
								archiver/solr-config-dir/lang/stopwords_en.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								archiver/solr-config-dir/lang/stopwords_en.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,54 @@ | |||
| # Licensed to the Apache Software Foundation (ASF) under one or more | ||||
| # contributor license agreements.  See the NOTICE file distributed with | ||||
| # this work for additional information regarding copyright ownership. | ||||
| # The ASF licenses this file to You under the Apache License, Version 2.0 | ||||
| # (the "License"); you may not use this file except in compliance with | ||||
| # the License.  You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| 
 | ||||
| # a couple of test stopwords to test that the words are really being | ||||
| # configured from this file: | ||||
| stopworda | ||||
| stopwordb | ||||
| 
 | ||||
| # Standard english stop words taken from Lucene's StopAnalyzer | ||||
| a | ||||
| an | ||||
| and | ||||
| are | ||||
| as | ||||
| at | ||||
| be | ||||
| but | ||||
| by | ||||
| for | ||||
| if | ||||
| in | ||||
| into | ||||
| is | ||||
| it | ||||
| no | ||||
| not | ||||
| of | ||||
| on | ||||
| or | ||||
| such | ||||
| that | ||||
| the | ||||
| their | ||||
| then | ||||
| there | ||||
| these | ||||
| they | ||||
| this | ||||
| to | ||||
| was | ||||
| will | ||||
| with | ||||
							
								
								
									
										356
									
								
								archiver/solr-config-dir/lang/stopwords_es.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										356
									
								
								archiver/solr-config-dir/lang/stopwords_es.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,356 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Spanish stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
| 
 | ||||
|  | The following is a ranked list (commonest to rarest) of stopwords | ||||
|  | deriving from a large sample of text. | ||||
| 
 | ||||
|  | Extra words have been added at the end. | ||||
| 
 | ||||
| de             |  from, of | ||||
| la             |  the, her | ||||
| que            |  who, that | ||||
| el             |  the | ||||
| en             |  in | ||||
| y              |  and | ||||
| a              |  to | ||||
| los            |  the, them | ||||
| del            |  de + el | ||||
| se             |  himself, from him etc | ||||
| las            |  the, them | ||||
| por            |  for, by, etc | ||||
| un             |  a | ||||
| para           |  for | ||||
| con            |  with | ||||
| no             |  no | ||||
| una            |  a | ||||
| su             |  his, her | ||||
| al             |  a + el | ||||
|   | es         from SER | ||||
| lo             |  him | ||||
| como           |  how | ||||
| más            |  more | ||||
| pero           |  pero | ||||
| sus            |  su plural | ||||
| le             |  to him, her | ||||
| ya             |  already | ||||
| o              |  or | ||||
|   | fue        from SER | ||||
| este           |  this | ||||
|   | ha         from HABER | ||||
| sí             |  himself etc | ||||
| porque         |  because | ||||
| esta           |  this | ||||
|   | son        from SER | ||||
| entre          |  between | ||||
|   | está     from ESTAR | ||||
| cuando         |  when | ||||
| muy            |  very | ||||
| sin            |  without | ||||
| sobre          |  on | ||||
|   | ser        from SER | ||||
|   | tiene      from TENER | ||||
| también        |  also | ||||
| me             |  me | ||||
| hasta          |  until | ||||
| hay            |  there is/are | ||||
| donde          |  where | ||||
|   | han        from HABER | ||||
| quien          |  whom, that | ||||
|   | están      from ESTAR | ||||
|   | estado     from ESTAR | ||||
| desde          |  from | ||||
| todo           |  all | ||||
| nos            |  us | ||||
| durante        |  during | ||||
|   | estados    from ESTAR | ||||
| todos          |  all | ||||
| uno            |  a | ||||
| les            |  to them | ||||
| ni             |  nor | ||||
| contra         |  against | ||||
| otros          |  other | ||||
|   | fueron     from SER | ||||
| ese            |  that | ||||
| eso            |  that | ||||
|   | había      from HABER | ||||
| ante           |  before | ||||
| ellos          |  they | ||||
| e              |  and (variant of y) | ||||
| esto           |  this | ||||
| mí             |  me | ||||
| antes          |  before | ||||
| algunos        |  some | ||||
| qué            |  what? | ||||
| unos           |  a | ||||
| yo             |  I | ||||
| otro           |  other | ||||
| otras          |  other | ||||
| otra           |  other | ||||
| él             |  he | ||||
| tanto          |  so much, many | ||||
| esa            |  that | ||||
| estos          |  these | ||||
| mucho          |  much, many | ||||
| quienes        |  who | ||||
| nada           |  nothing | ||||
| muchos         |  many | ||||
| cual           |  who | ||||
|   | sea        from SER | ||||
| poco           |  few | ||||
| ella           |  she | ||||
| estar          |  to be | ||||
|   | haber      from HABER | ||||
| estas          |  these | ||||
|   | estaba     from ESTAR | ||||
|   | estamos    from ESTAR | ||||
| algunas        |  some | ||||
| algo           |  something | ||||
| nosotros       |  we | ||||
| 
 | ||||
|       | other forms | ||||
| 
 | ||||
| mi             |  me | ||||
| mis            |  mi plural | ||||
| tú             |  thou | ||||
| te             |  thee | ||||
| ti             |  thee | ||||
| tu             |  thy | ||||
| tus            |  tu plural | ||||
| ellas          |  they | ||||
| nosotras       |  we | ||||
| vosotros       |  you | ||||
| vosotras       |  you | ||||
| os             |  you | ||||
| mío            |  mine | ||||
| mía            | | ||||
| míos           | | ||||
| mías           | | ||||
| tuyo           |  thine | ||||
| tuya           | | ||||
| tuyos          | | ||||
| tuyas          | | ||||
| suyo           |  his, hers, theirs | ||||
| suya           | | ||||
| suyos          | | ||||
| suyas          | | ||||
| nuestro        |  ours | ||||
| nuestra        | | ||||
| nuestros       | | ||||
| nuestras       | | ||||
| vuestro        |  yours | ||||
| vuestra        | | ||||
| vuestros       | | ||||
| vuestras       | | ||||
| esos           |  those | ||||
| esas           |  those | ||||
| 
 | ||||
|                | forms of estar, to be (not including the infinitive): | ||||
| estoy | ||||
| estás | ||||
| está | ||||
| estamos | ||||
| estáis | ||||
| están | ||||
| esté | ||||
| estés | ||||
| estemos | ||||
| estéis | ||||
| estén | ||||
| estaré | ||||
| estarás | ||||
| estará | ||||
| estaremos | ||||
| estaréis | ||||
| estarán | ||||
| estaría | ||||
| estarías | ||||
| estaríamos | ||||
| estaríais | ||||
| estarían | ||||
| estaba | ||||
| estabas | ||||
| estábamos | ||||
| estabais | ||||
| estaban | ||||
| estuve | ||||
| estuviste | ||||
| estuvo | ||||
| estuvimos | ||||
| estuvisteis | ||||
| estuvieron | ||||
| estuviera | ||||
| estuvieras | ||||
| estuviéramos | ||||
| estuvierais | ||||
| estuvieran | ||||
| estuviese | ||||
| estuvieses | ||||
| estuviésemos | ||||
| estuvieseis | ||||
| estuviesen | ||||
| estando | ||||
| estado | ||||
| estada | ||||
| estados | ||||
| estadas | ||||
| estad | ||||
| 
 | ||||
|                | forms of haber, to have (not including the infinitive): | ||||
| he | ||||
| has | ||||
| ha | ||||
| hemos | ||||
| habéis | ||||
| han | ||||
| haya | ||||
| hayas | ||||
| hayamos | ||||
| hayáis | ||||
| hayan | ||||
| habré | ||||
| habrás | ||||
| habrá | ||||
| habremos | ||||
| habréis | ||||
| habrán | ||||
| habría | ||||
| habrías | ||||
| habríamos | ||||
| habríais | ||||
| habrían | ||||
| había | ||||
| habías | ||||
| habíamos | ||||
| habíais | ||||
| habían | ||||
| hube | ||||
| hubiste | ||||
| hubo | ||||
| hubimos | ||||
| hubisteis | ||||
| hubieron | ||||
| hubiera | ||||
| hubieras | ||||
| hubiéramos | ||||
| hubierais | ||||
| hubieran | ||||
| hubiese | ||||
| hubieses | ||||
| hubiésemos | ||||
| hubieseis | ||||
| hubiesen | ||||
| habiendo | ||||
| habido | ||||
| habida | ||||
| habidos | ||||
| habidas | ||||
| 
 | ||||
|                | forms of ser, to be (not including the infinitive): | ||||
| soy | ||||
| eres | ||||
| es | ||||
| somos | ||||
| sois | ||||
| son | ||||
| sea | ||||
| seas | ||||
| seamos | ||||
| seáis | ||||
| sean | ||||
| seré | ||||
| serás | ||||
| será | ||||
| seremos | ||||
| seréis | ||||
| serán | ||||
| sería | ||||
| serías | ||||
| seríamos | ||||
| seríais | ||||
| serían | ||||
| era | ||||
| eras | ||||
| éramos | ||||
| erais | ||||
| eran | ||||
| fui | ||||
| fuiste | ||||
| fue | ||||
| fuimos | ||||
| fuisteis | ||||
| fueron | ||||
| fuera | ||||
| fueras | ||||
| fuéramos | ||||
| fuerais | ||||
| fueran | ||||
| fuese | ||||
| fueses | ||||
| fuésemos | ||||
| fueseis | ||||
| fuesen | ||||
| siendo | ||||
| sido | ||||
|   |  sed also means 'thirst' | ||||
| 
 | ||||
|                | forms of tener, to have (not including the infinitive): | ||||
| tengo | ||||
| tienes | ||||
| tiene | ||||
| tenemos | ||||
| tenéis | ||||
| tienen | ||||
| tenga | ||||
| tengas | ||||
| tengamos | ||||
| tengáis | ||||
| tengan | ||||
| tendré | ||||
| tendrás | ||||
| tendrá | ||||
| tendremos | ||||
| tendréis | ||||
| tendrán | ||||
| tendría | ||||
| tendrías | ||||
| tendríamos | ||||
| tendríais | ||||
| tendrían | ||||
| tenía | ||||
| tenías | ||||
| teníamos | ||||
| teníais | ||||
| tenían | ||||
| tuve | ||||
| tuviste | ||||
| tuvo | ||||
| tuvimos | ||||
| tuvisteis | ||||
| tuvieron | ||||
| tuviera | ||||
| tuvieras | ||||
| tuviéramos | ||||
| tuvierais | ||||
| tuvieran | ||||
| tuviese | ||||
| tuvieses | ||||
| tuviésemos | ||||
| tuvieseis | ||||
| tuviesen | ||||
| teniendo | ||||
| tenido | ||||
| tenida | ||||
| tenidos | ||||
| tenidas | ||||
| tened | ||||
| 
 | ||||
							
								
								
									
										1603
									
								
								archiver/solr-config-dir/lang/stopwords_et.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1603
									
								
								archiver/solr-config-dir/lang/stopwords_et.txt
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										99
									
								
								archiver/solr-config-dir/lang/stopwords_eu.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								archiver/solr-config-dir/lang/stopwords_eu.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,99 @@ | |||
| # example set of basque stopwords | ||||
| al | ||||
| anitz | ||||
| arabera | ||||
| asko | ||||
| baina | ||||
| bat | ||||
| batean | ||||
| batek | ||||
| bati | ||||
| batzuei | ||||
| batzuek | ||||
| batzuetan | ||||
| batzuk | ||||
| bera | ||||
| beraiek | ||||
| berau | ||||
| berauek | ||||
| bere | ||||
| berori | ||||
| beroriek | ||||
| beste | ||||
| bezala | ||||
| da | ||||
| dago | ||||
| dira | ||||
| ditu | ||||
| du | ||||
| dute | ||||
| edo | ||||
| egin | ||||
| ere | ||||
| eta | ||||
| eurak | ||||
| ez | ||||
| gainera | ||||
| gu | ||||
| gutxi | ||||
| guzti | ||||
| haiei | ||||
| haiek | ||||
| haietan | ||||
| hainbeste | ||||
| hala | ||||
| han | ||||
| handik | ||||
| hango | ||||
| hara | ||||
| hari | ||||
| hark | ||||
| hartan | ||||
| hau | ||||
| hauei | ||||
| hauek | ||||
| hauetan | ||||
| hemen | ||||
| hemendik | ||||
| hemengo | ||||
| hi | ||||
| hona | ||||
| honek | ||||
| honela | ||||
| honetan | ||||
| honi | ||||
| hor | ||||
| hori | ||||
| horiei | ||||
| horiek | ||||
| horietan | ||||
| horko | ||||
| horra | ||||
| horrek | ||||
| horrela | ||||
| horretan | ||||
| horri | ||||
| hortik | ||||
| hura | ||||
| izan | ||||
| ni | ||||
| noiz | ||||
| nola | ||||
| non | ||||
| nondik | ||||
| nongo | ||||
| nor | ||||
| nora | ||||
| ze | ||||
| zein | ||||
| zen | ||||
| zenbait | ||||
| zenbat | ||||
| zer | ||||
| zergatik | ||||
| ziren | ||||
| zituen | ||||
| zu | ||||
| zuek | ||||
| zuen | ||||
| zuten | ||||
							
								
								
									
										313
									
								
								archiver/solr-config-dir/lang/stopwords_fa.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										313
									
								
								archiver/solr-config-dir/lang/stopwords_fa.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,313 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | ||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | ||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | ||||
| # Note: by default this file is used after normalization, so when adding entries | ||||
| # to this file, use the arabic 'ي' instead of 'ی' | ||||
| انان | ||||
| نداشته | ||||
| سراسر | ||||
| خياه | ||||
| ايشان | ||||
| وي | ||||
| تاكنون | ||||
| بيشتري | ||||
| دوم | ||||
| پس | ||||
| ناشي | ||||
| وگو | ||||
| يا | ||||
| داشتند | ||||
| سپس | ||||
| هنگام | ||||
| هرگز | ||||
| پنج | ||||
| نشان | ||||
| امسال | ||||
| ديگر | ||||
| گروهي | ||||
| شدند | ||||
| چطور | ||||
| ده | ||||
| و | ||||
| دو | ||||
| نخستين | ||||
| ولي | ||||
| چرا | ||||
| چه | ||||
| وسط | ||||
| ه | ||||
| كدام | ||||
| قابل | ||||
| يك | ||||
| رفت | ||||
| هفت | ||||
| همچنين | ||||
| در | ||||
| هزار | ||||
| بله | ||||
| بلي | ||||
| شايد | ||||
| اما | ||||
| شناسي | ||||
| گرفته | ||||
| دهد | ||||
| داشته | ||||
| دانست | ||||
| داشتن | ||||
| خواهيم | ||||
| ميليارد | ||||
| وقتيكه | ||||
| امد | ||||
| خواهد | ||||
| جز | ||||
| اورده | ||||
| شده | ||||
| بلكه | ||||
| خدمات | ||||
| شدن | ||||
| برخي | ||||
| نبود | ||||
| بسياري | ||||
| جلوگيري | ||||
| حق | ||||
| كردند | ||||
| نوعي | ||||
| بعري | ||||
| نكرده | ||||
| نظير | ||||
| نبايد | ||||
| بوده | ||||
| بودن | ||||
| داد | ||||
| اورد | ||||
| هست | ||||
| جايي | ||||
| شود | ||||
| دنبال | ||||
| داده | ||||
| بايد | ||||
| سابق | ||||
| هيچ | ||||
| همان | ||||
| انجا | ||||
| كمتر | ||||
| كجاست | ||||
| گردد | ||||
| كسي | ||||
| تر | ||||
| مردم | ||||
| تان | ||||
| دادن | ||||
| بودند | ||||
| سري | ||||
| جدا | ||||
| ندارند | ||||
| مگر | ||||
| يكديگر | ||||
| دارد | ||||
| دهند | ||||
| بنابراين | ||||
| هنگامي | ||||
| سمت | ||||
| جا | ||||
| انچه | ||||
| خود | ||||
| دادند | ||||
| زياد | ||||
| دارند | ||||
| اثر | ||||
| بدون | ||||
| بهترين | ||||
| بيشتر | ||||
| البته | ||||
| به | ||||
| براساس | ||||
| بيرون | ||||
| كرد | ||||
| بعضي | ||||
| گرفت | ||||
| توي | ||||
| اي | ||||
| ميليون | ||||
| او | ||||
| جريان | ||||
| تول | ||||
| بر | ||||
| مانند | ||||
| برابر | ||||
| باشيم | ||||
| مدتي | ||||
| گويند | ||||
| اكنون | ||||
| تا | ||||
| تنها | ||||
| جديد | ||||
| چند | ||||
| بي | ||||
| نشده | ||||
| كردن | ||||
| كردم | ||||
| گويد | ||||
| كرده | ||||
| كنيم | ||||
| نمي | ||||
| نزد | ||||
| روي | ||||
| قصد | ||||
| فقط | ||||
| بالاي | ||||
| ديگران | ||||
| اين | ||||
| ديروز | ||||
| توسط | ||||
| سوم | ||||
| ايم | ||||
| دانند | ||||
| سوي | ||||
| استفاده | ||||
| شما | ||||
| كنار | ||||
| داريم | ||||
| ساخته | ||||
| طور | ||||
| امده | ||||
| رفته | ||||
| نخست | ||||
| بيست | ||||
| نزديك | ||||
| طي | ||||
| كنيد | ||||
| از | ||||
| انها | ||||
| تمامي | ||||
| داشت | ||||
| يكي | ||||
| طريق | ||||
| اش | ||||
| چيست | ||||
| روب | ||||
| نمايد | ||||
| گفت | ||||
| چندين | ||||
| چيزي | ||||
| تواند | ||||
| ام | ||||
| ايا | ||||
| با | ||||
| ان | ||||
| ايد | ||||
| ترين | ||||
| اينكه | ||||
| ديگري | ||||
| راه | ||||
| هايي | ||||
| بروز | ||||
| همچنان | ||||
| پاعين | ||||
| كس | ||||
| حدود | ||||
| مختلف | ||||
| مقابل | ||||
| چيز | ||||
| گيرد | ||||
| ندارد | ||||
| ضد | ||||
| همچون | ||||
| سازي | ||||
| شان | ||||
| مورد | ||||
| باره | ||||
| مرسي | ||||
| خويش | ||||
| برخوردار | ||||
| چون | ||||
| خارج | ||||
| شش | ||||
| هنوز | ||||
| تحت | ||||
| ضمن | ||||
| هستيم | ||||
| گفته | ||||
| فكر | ||||
| بسيار | ||||
| پيش | ||||
| براي | ||||
| روزهاي | ||||
| انكه | ||||
| نخواهد | ||||
| بالا | ||||
| كل | ||||
| وقتي | ||||
| كي | ||||
| چنين | ||||
| كه | ||||
| گيري | ||||
| نيست | ||||
| است | ||||
| كجا | ||||
| كند | ||||
| نيز | ||||
| يابد | ||||
| بندي | ||||
| حتي | ||||
| توانند | ||||
| عقب | ||||
| خواست | ||||
| كنند | ||||
| بين | ||||
| تمام | ||||
| همه | ||||
| ما | ||||
| باشند | ||||
| مثل | ||||
| شد | ||||
| اري | ||||
| باشد | ||||
| اره | ||||
| طبق | ||||
| بعد | ||||
| اگر | ||||
| صورت | ||||
| غير | ||||
| جاي | ||||
| بيش | ||||
| ريزي | ||||
| اند | ||||
| زيرا | ||||
| چگونه | ||||
| بار | ||||
| لطفا | ||||
| مي | ||||
| درباره | ||||
| من | ||||
| ديده | ||||
| همين | ||||
| گذاري | ||||
| برداري | ||||
| علت | ||||
| گذاشته | ||||
| هم | ||||
| فوق | ||||
| نه | ||||
| ها | ||||
| شوند | ||||
| اباد | ||||
| همواره | ||||
| هر | ||||
| اول | ||||
| خواهند | ||||
| چهار | ||||
| نام | ||||
| امروز | ||||
| مان | ||||
| هاي | ||||
| قبل | ||||
| كنم | ||||
| سعي | ||||
| تازه | ||||
| را | ||||
| هستند | ||||
| زير | ||||
| جلوي | ||||
| عنوان | ||||
| بود | ||||
							
								
								
									
										97
									
								
								archiver/solr-config-dir/lang/stopwords_fi.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								archiver/solr-config-dir/lang/stopwords_fi.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,97 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
|   | ||||
| | forms of BE | ||||
| 
 | ||||
| olla | ||||
| olen | ||||
| olet | ||||
| on | ||||
| olemme | ||||
| olette | ||||
| ovat | ||||
| ole        | negative form | ||||
| 
 | ||||
| oli | ||||
| olisi | ||||
| olisit | ||||
| olisin | ||||
| olisimme | ||||
| olisitte | ||||
| olisivat | ||||
| olit | ||||
| olin | ||||
| olimme | ||||
| olitte | ||||
| olivat | ||||
| ollut | ||||
| olleet | ||||
| 
 | ||||
| en         | negation | ||||
| et | ||||
| ei | ||||
| emme | ||||
| ette | ||||
| eivät | ||||
| 
 | ||||
| |Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans | ||||
| minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I | ||||
| sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you | ||||
| hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she | ||||
| me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we | ||||
| te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you | ||||
| he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they | ||||
| 
 | ||||
| tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this | ||||
| tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that | ||||
| se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it | ||||
| nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these | ||||
| nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those | ||||
| ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they | ||||
| 
 | ||||
| kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who | ||||
| ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl) | ||||
| mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what | ||||
| mitkä                                                                                    | (pl) | ||||
| 
 | ||||
| joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which | ||||
| jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl) | ||||
| 
 | ||||
| | conjunctions | ||||
| 
 | ||||
| että   | that | ||||
| ja     | and | ||||
| jos    | if | ||||
| koska  | because | ||||
| kuin   | than | ||||
| mutta  | but | ||||
| niin   | so | ||||
| sekä   | and | ||||
| sillä  | for | ||||
| tai    | or | ||||
| vaan   | but | ||||
| vai    | or | ||||
| vaikka | although | ||||
| 
 | ||||
| 
 | ||||
| | prepositions | ||||
| 
 | ||||
| kanssa  | with | ||||
| mukaan  | according to | ||||
| noin    | about | ||||
| poikki  | across | ||||
| yli     | over, across | ||||
| 
 | ||||
| | other | ||||
| 
 | ||||
| kun    | when | ||||
| niin   | so | ||||
| nyt    | now | ||||
| itse   | self | ||||
| 
 | ||||
							
								
								
									
										186
									
								
								archiver/solr-config-dir/lang/stopwords_fr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										186
									
								
								archiver/solr-config-dir/lang/stopwords_fr.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,186 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A French stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
| au             |  a + le | ||||
| aux            |  a + les | ||||
| avec           |  with | ||||
| ce             |  this | ||||
| ces            |  these | ||||
| dans           |  with | ||||
| de             |  of | ||||
| des            |  de + les | ||||
| du             |  de + le | ||||
| elle           |  she | ||||
| en             |  `of them' etc | ||||
| et             |  and | ||||
| eux            |  them | ||||
| il             |  he | ||||
| je             |  I | ||||
| la             |  the | ||||
| le             |  the | ||||
| leur           |  their | ||||
| lui            |  him | ||||
| ma             |  my (fem) | ||||
| mais           |  but | ||||
| me             |  me | ||||
| même           |  same; as in moi-même (myself) etc | ||||
| mes            |  me (pl) | ||||
| moi            |  me | ||||
| mon            |  my (masc) | ||||
| ne             |  not | ||||
| nos            |  our (pl) | ||||
| notre          |  our | ||||
| nous           |  we | ||||
| on             |  one | ||||
| ou             |  where | ||||
| par            |  by | ||||
| pas            |  not | ||||
| pour           |  for | ||||
| qu             |  que before vowel | ||||
| que            |  that | ||||
| qui            |  who | ||||
| sa             |  his, her (fem) | ||||
| se             |  oneself | ||||
| ses            |  his (pl) | ||||
| son            |  his, her (masc) | ||||
| sur            |  on | ||||
| ta             |  thy (fem) | ||||
| te             |  thee | ||||
| tes            |  thy (pl) | ||||
| toi            |  thee | ||||
| ton            |  thy (masc) | ||||
| tu             |  thou | ||||
| un             |  a | ||||
| une            |  a | ||||
| vos            |  your (pl) | ||||
| votre          |  your | ||||
| vous           |  you | ||||
| 
 | ||||
|                |  single letter forms | ||||
| 
 | ||||
| c              |  c' | ||||
| d              |  d' | ||||
| j              |  j' | ||||
| l              |  l' | ||||
| à              |  to, at | ||||
| m              |  m' | ||||
| n              |  n' | ||||
| s              |  s' | ||||
| t              |  t' | ||||
| y              |  there | ||||
| 
 | ||||
|                | forms of être (not including the infinitive): | ||||
| été | ||||
| étée | ||||
| étées | ||||
| étés | ||||
| étant | ||||
| suis | ||||
| es | ||||
| est | ||||
| sommes | ||||
| êtes | ||||
| sont | ||||
| serai | ||||
| seras | ||||
| sera | ||||
| serons | ||||
| serez | ||||
| seront | ||||
| serais | ||||
| serait | ||||
| serions | ||||
| seriez | ||||
| seraient | ||||
| étais | ||||
| était | ||||
| étions | ||||
| étiez | ||||
| étaient | ||||
| fus | ||||
| fut | ||||
| fûmes | ||||
| fûtes | ||||
| furent | ||||
| sois | ||||
| soit | ||||
| soyons | ||||
| soyez | ||||
| soient | ||||
| fusse | ||||
| fusses | ||||
| fût | ||||
| fussions | ||||
| fussiez | ||||
| fussent | ||||
| 
 | ||||
|                | forms of avoir (not including the infinitive): | ||||
| ayant | ||||
| eu | ||||
| eue | ||||
| eues | ||||
| eus | ||||
| ai | ||||
| as | ||||
| avons | ||||
| avez | ||||
| ont | ||||
| aurai | ||||
| auras | ||||
| aura | ||||
| aurons | ||||
| aurez | ||||
| auront | ||||
| aurais | ||||
| aurait | ||||
| aurions | ||||
| auriez | ||||
| auraient | ||||
| avais | ||||
| avait | ||||
| avions | ||||
| aviez | ||||
| avaient | ||||
| eut | ||||
| eûmes | ||||
| eûtes | ||||
| eurent | ||||
| aie | ||||
| aies | ||||
| ait | ||||
| ayons | ||||
| ayez | ||||
| aient | ||||
| eusse | ||||
| eusses | ||||
| eût | ||||
| eussions | ||||
| eussiez | ||||
| eussent | ||||
| 
 | ||||
|                | Later additions (from Jean-Christophe Deschamps) | ||||
| ceci           |  this | ||||
| cela           |  that | ||||
| celà           |  that | ||||
| cet            |  this | ||||
| cette          |  this | ||||
| ici            |  here | ||||
| ils            |  they | ||||
| les            |  the (pl) | ||||
| leurs          |  their (pl) | ||||
| quel           |  which | ||||
| quels          |  which | ||||
| quelle         |  which | ||||
| quelles        |  which | ||||
| sans           |  without | ||||
| soi            |  oneself | ||||
| 
 | ||||
							
								
								
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_ga.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								archiver/solr-config-dir/lang/stopwords_ga.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,110 @@ | |||
| 
 | ||||
| a | ||||
| ach | ||||
| ag | ||||
| agus | ||||
| an | ||||
| aon | ||||
| ar | ||||
| arna | ||||
| as | ||||
| b' | ||||
| ba | ||||
| beirt | ||||
| bhúr | ||||
| caoga | ||||
| ceathair | ||||
| ceathrar | ||||
| chomh | ||||
| chtó | ||||
| chuig | ||||
| chun | ||||
| cois | ||||
| céad | ||||
| cúig | ||||
| cúigear | ||||
| d' | ||||
| daichead | ||||
| dar | ||||
| de | ||||
| deich | ||||
| deichniúr | ||||
| den | ||||
| dhá | ||||
| do | ||||
| don | ||||
| dtí | ||||
| dá | ||||
| dár | ||||
| dó | ||||
| faoi | ||||
| faoin | ||||
| faoina | ||||
| faoinár | ||||
| fara | ||||
| fiche | ||||
| gach | ||||
| gan | ||||
| go | ||||
| gur | ||||
| haon | ||||
| hocht | ||||
| i | ||||
| iad | ||||
| idir | ||||
| in | ||||
| ina | ||||
| ins | ||||
| inár | ||||
| is | ||||
| le | ||||
| leis | ||||
| lena | ||||
| lenár | ||||
| m' | ||||
| mar | ||||
| mo | ||||
| mé | ||||
| na | ||||
| nach | ||||
| naoi | ||||
| naonúr | ||||
| ná | ||||
| ní | ||||
| níor | ||||
| nó | ||||
| nócha | ||||
| ocht | ||||
| ochtar | ||||
| os | ||||
| roimh | ||||
| sa | ||||
| seacht | ||||
| seachtar | ||||
| seachtó | ||||
| seasca | ||||
| seisear | ||||
| siad | ||||
| sibh | ||||
| sinn | ||||
| sna | ||||
| sé | ||||
| sí | ||||
| tar | ||||
| thar | ||||
| thú | ||||
| triúr | ||||
| trí | ||||
| trína | ||||
| trínár | ||||
| tríocha | ||||
| tú | ||||
| um | ||||
| ár | ||||
| é | ||||
| éis | ||||
| í | ||||
| ó | ||||
| ón | ||||
| óna | ||||
| ónár | ||||
							
								
								
									
										161
									
								
								archiver/solr-config-dir/lang/stopwords_gl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								archiver/solr-config-dir/lang/stopwords_gl.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,161 @@ | |||
| # galican stopwords | ||||
| a | ||||
| aínda | ||||
| alí | ||||
| aquel | ||||
| aquela | ||||
| aquelas | ||||
| aqueles | ||||
| aquilo | ||||
| aquí | ||||
| ao | ||||
| aos | ||||
| as | ||||
| así | ||||
| á | ||||
| ben | ||||
| cando | ||||
| che | ||||
| co | ||||
| coa | ||||
| comigo | ||||
| con | ||||
| connosco | ||||
| contigo | ||||
| convosco | ||||
| coas | ||||
| cos | ||||
| cun | ||||
| cuns | ||||
| cunha | ||||
| cunhas | ||||
| da | ||||
| dalgunha | ||||
| dalgunhas | ||||
| dalgún | ||||
| dalgúns | ||||
| das | ||||
| de | ||||
| del | ||||
| dela | ||||
| delas | ||||
| deles | ||||
| desde | ||||
| deste | ||||
| do | ||||
| dos | ||||
| dun | ||||
| duns | ||||
| dunha | ||||
| dunhas | ||||
| e | ||||
| el | ||||
| ela | ||||
| elas | ||||
| eles | ||||
| en | ||||
| era | ||||
| eran | ||||
| esa | ||||
| esas | ||||
| ese | ||||
| eses | ||||
| esta | ||||
| estar | ||||
| estaba | ||||
| está | ||||
| están | ||||
| este | ||||
| estes | ||||
| estiven | ||||
| estou | ||||
| eu | ||||
| é | ||||
| facer | ||||
| foi | ||||
| foron | ||||
| fun | ||||
| había | ||||
| hai | ||||
| iso | ||||
| isto | ||||
| la | ||||
| las | ||||
| lle | ||||
| lles | ||||
| lo | ||||
| los | ||||
| mais | ||||
| me | ||||
| meu | ||||
| meus | ||||
| min | ||||
| miña | ||||
| miñas | ||||
| moi | ||||
| na | ||||
| nas | ||||
| neste | ||||
| nin | ||||
| no | ||||
| non | ||||
| nos | ||||
| nosa | ||||
| nosas | ||||
| noso | ||||
| nosos | ||||
| nós | ||||
| nun | ||||
| nunha | ||||
| nuns | ||||
| nunhas | ||||
| o | ||||
| os | ||||
| ou | ||||
| ó | ||||
| ós | ||||
| para | ||||
| pero | ||||
| pode | ||||
| pois | ||||
| pola | ||||
| polas | ||||
| polo | ||||
| polos | ||||
| por | ||||
| que | ||||
| se | ||||
| senón | ||||
| ser | ||||
| seu | ||||
| seus | ||||
| sexa | ||||
| sido | ||||
| sobre | ||||
| súa | ||||
| súas | ||||
| tamén | ||||
| tan | ||||
| te | ||||
| ten | ||||
| teñen | ||||
| teño | ||||
| ter | ||||
| teu | ||||
| teus | ||||
| ti | ||||
| tido | ||||
| tiña | ||||
| tiven | ||||
| túa | ||||
| túas | ||||
| un | ||||
| unha | ||||
| unhas | ||||
| uns | ||||
| vos | ||||
| vosa | ||||
| vosas | ||||
| voso | ||||
| vosos | ||||
| vós | ||||
							
								
								
									
										235
									
								
								archiver/solr-config-dir/lang/stopwords_hi.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								archiver/solr-config-dir/lang/stopwords_hi.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,235 @@ | |||
| # Also see http://www.opensource.org/licenses/bsd-license.html | ||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | ||||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | ||||
| # Note: by default this file also contains forms normalized by HindiNormalizer  | ||||
| # for spelling variation (see section below), such that it can be used whether or  | ||||
| # not you enable that feature. When adding additional entries to this list, | ||||
| # please add the normalized form as well.  | ||||
| अंदर | ||||
| अत | ||||
| अपना | ||||
| अपनी | ||||
| अपने | ||||
| अभी | ||||
| आदि | ||||
| आप | ||||
| इत्यादि | ||||
| इन  | ||||
| इनका | ||||
| इन्हीं | ||||
| इन्हें | ||||
| इन्हों | ||||
| इस | ||||
| इसका | ||||
| इसकी | ||||
| इसके | ||||
| इसमें | ||||
| इसी | ||||
| इसे | ||||
| उन | ||||
| उनका | ||||
| उनकी | ||||
| उनके | ||||
| उनको | ||||
| उन्हीं | ||||
| उन्हें | ||||
| उन्हों | ||||
| उस | ||||
| उसके | ||||
| उसी | ||||
| उसे | ||||
| एक | ||||
| एवं | ||||
| एस | ||||
| ऐसे | ||||
| और | ||||
| कई | ||||
| कर | ||||
| करता | ||||
| करते | ||||
| करना | ||||
| करने | ||||
| करें | ||||
| कहते | ||||
| कहा | ||||
| का | ||||
| काफ़ी | ||||
| कि | ||||
| कितना | ||||
| किन्हें | ||||
| किन्हों | ||||
| किया | ||||
| किर | ||||
| किस | ||||
| किसी | ||||
| किसे | ||||
| की | ||||
| कुछ | ||||
| कुल | ||||
| के | ||||
| को | ||||
| कोई | ||||
| कौन | ||||
| कौनसा | ||||
| गया | ||||
| घर | ||||
| जब | ||||
| जहाँ | ||||
| जा | ||||
| जितना | ||||
| जिन | ||||
| जिन्हें | ||||
| जिन्हों | ||||
| जिस | ||||
| जिसे | ||||
| जीधर | ||||
| जैसा | ||||
| जैसे | ||||
| जो | ||||
| तक | ||||
| तब | ||||
| तरह | ||||
| तिन | ||||
| तिन्हें | ||||
| तिन्हों | ||||
| तिस | ||||
| तिसे | ||||
| तो | ||||
| था | ||||
| थी | ||||
| थे | ||||
| दबारा | ||||
| दिया | ||||
| दुसरा | ||||
| दूसरे | ||||
| दो | ||||
| द्वारा | ||||
| न | ||||
| नहीं | ||||
| ना | ||||
| निहायत | ||||
| नीचे | ||||
| ने | ||||
| पर | ||||
| पर   | ||||
| पहले | ||||
| पूरा | ||||
| पे | ||||
| फिर | ||||
| बनी | ||||
| बही | ||||
| बहुत | ||||
| बाद | ||||
| बाला | ||||
| बिलकुल | ||||
| भी | ||||
| भीतर | ||||
| मगर | ||||
| मानो | ||||
| मे | ||||
| में | ||||
| यदि | ||||
| यह | ||||
| यहाँ | ||||
| यही | ||||
| या | ||||
| यिह  | ||||
| ये | ||||
| रखें | ||||
| रहा | ||||
| रहे | ||||
| ऱ्वासा | ||||
| लिए | ||||
| लिये | ||||
| लेकिन | ||||
| व | ||||
| वर्ग | ||||
| वह | ||||
| वह  | ||||
| वहाँ | ||||
| वहीं | ||||
| वाले | ||||
| वुह  | ||||
| वे | ||||
| वग़ैरह | ||||
| संग | ||||
| सकता | ||||
| सकते | ||||
| सबसे | ||||
| सभी | ||||
| साथ | ||||
| साबुत | ||||
| साभ | ||||
| सारा | ||||
| से | ||||
| सो | ||||
| ही | ||||
| हुआ | ||||
| हुई | ||||
| हुए | ||||
| है | ||||
| हैं | ||||
| हो | ||||
| होता | ||||
| होती | ||||
| होते | ||||
| होना | ||||
| होने | ||||
| # additional normalized forms of the above | ||||
| अपनि | ||||
| जेसे | ||||
| होति | ||||
| सभि | ||||
| तिंहों | ||||
| इंहों | ||||
| दवारा | ||||
| इसि | ||||
| किंहें | ||||
| थि | ||||
| उंहों | ||||
| ओर | ||||
| जिंहें | ||||
| वहिं | ||||
| अभि | ||||
| बनि | ||||
| हि | ||||
| उंहिं | ||||
| उंहें | ||||
| हें | ||||
| वगेरह | ||||
| एसे | ||||
| रवासा | ||||
| कोन | ||||
| निचे | ||||
| काफि | ||||
| उसि | ||||
| पुरा | ||||
| भितर | ||||
| हे | ||||
| बहि | ||||
| वहां | ||||
| कोइ | ||||
| यहां | ||||
| जिंहों | ||||
| तिंहें | ||||
| किसि | ||||
| कइ | ||||
| यहि | ||||
| इंहिं | ||||
| जिधर | ||||
| इंहें | ||||
| अदि | ||||
| इतयादि | ||||
| हुइ | ||||
| कोनसा | ||||
| इसकि | ||||
| दुसरे | ||||
| जहां | ||||
| अप | ||||
| किंहों | ||||
| उनकि | ||||
| भि | ||||
| वरग | ||||
| हुअ | ||||
| जेसा | ||||
| नहिं | ||||
							
								
								
									
										211
									
								
								archiver/solr-config-dir/lang/stopwords_hu.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								archiver/solr-config-dir/lang/stopwords_hu.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,211 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
|   | ||||
| | Hungarian stop word list | ||||
| | prepared by Anna Tordai | ||||
| 
 | ||||
| a | ||||
| ahogy | ||||
| ahol | ||||
| aki | ||||
| akik | ||||
| akkor | ||||
| alatt | ||||
| által | ||||
| általában | ||||
| amely | ||||
| amelyek | ||||
| amelyekben | ||||
| amelyeket | ||||
| amelyet | ||||
| amelynek | ||||
| ami | ||||
| amit | ||||
| amolyan | ||||
| amíg | ||||
| amikor | ||||
| át | ||||
| abban | ||||
| ahhoz | ||||
| annak | ||||
| arra | ||||
| arról | ||||
| az | ||||
| azok | ||||
| azon | ||||
| azt | ||||
| azzal | ||||
| azért | ||||
| aztán | ||||
| azután | ||||
| azonban | ||||
| bár | ||||
| be | ||||
| belül | ||||
| benne | ||||
| cikk | ||||
| cikkek | ||||
| cikkeket | ||||
| csak | ||||
| de | ||||
| e | ||||
| eddig | ||||
| egész | ||||
| egy | ||||
| egyes | ||||
| egyetlen | ||||
| egyéb | ||||
| egyik | ||||
| egyre | ||||
| ekkor | ||||
| el | ||||
| elég | ||||
| ellen | ||||
| elő | ||||
| először | ||||
| előtt | ||||
| első | ||||
| én | ||||
| éppen | ||||
| ebben | ||||
| ehhez | ||||
| emilyen | ||||
| ennek | ||||
| erre | ||||
| ez | ||||
| ezt | ||||
| ezek | ||||
| ezen | ||||
| ezzel | ||||
| ezért | ||||
| és | ||||
| fel | ||||
| felé | ||||
| hanem | ||||
| hiszen | ||||
| hogy | ||||
| hogyan | ||||
| igen | ||||
| így | ||||
| illetve | ||||
| ill. | ||||
| ill | ||||
| ilyen | ||||
| ilyenkor | ||||
| ison | ||||
| ismét | ||||
| itt | ||||
| jó | ||||
| jól | ||||
| jobban | ||||
| kell | ||||
| kellett | ||||
| keresztül | ||||
| keressünk | ||||
| ki | ||||
| kívül | ||||
| között | ||||
| közül | ||||
| legalább | ||||
| lehet | ||||
| lehetett | ||||
| legyen | ||||
| lenne | ||||
| lenni | ||||
| lesz | ||||
| lett | ||||
| maga | ||||
| magát | ||||
| majd | ||||
| majd | ||||
| már | ||||
| más | ||||
| másik | ||||
| meg | ||||
| még | ||||
| mellett | ||||
| mert | ||||
| mely | ||||
| melyek | ||||
| mi | ||||
| mit | ||||
| míg | ||||
| miért | ||||
| milyen | ||||
| mikor | ||||
| minden | ||||
| mindent | ||||
| mindenki | ||||
| mindig | ||||
| mint | ||||
| mintha | ||||
| mivel | ||||
| most | ||||
| nagy | ||||
| nagyobb | ||||
| nagyon | ||||
| ne | ||||
| néha | ||||
| nekem | ||||
| neki | ||||
| nem | ||||
| néhány | ||||
| nélkül | ||||
| nincs | ||||
| olyan | ||||
| ott | ||||
| össze | ||||
| ő | ||||
| ők | ||||
| őket | ||||
| pedig | ||||
| persze | ||||
| rá | ||||
| s | ||||
| saját | ||||
| sem | ||||
| semmi | ||||
| sok | ||||
| sokat | ||||
| sokkal | ||||
| számára | ||||
| szemben | ||||
| szerint | ||||
| szinte | ||||
| talán | ||||
| tehát | ||||
| teljes | ||||
| tovább | ||||
| továbbá | ||||
| több | ||||
| úgy | ||||
| ugyanis | ||||
| új | ||||
| újabb | ||||
| újra | ||||
| után | ||||
| utána | ||||
| utolsó | ||||
| vagy | ||||
| vagyis | ||||
| valaki | ||||
| valami | ||||
| valamint | ||||
| való | ||||
| vagyok | ||||
| van | ||||
| vannak | ||||
| volt | ||||
| voltam | ||||
| voltak | ||||
| voltunk | ||||
| vissza | ||||
| vele | ||||
| viszont | ||||
| volna | ||||
							
								
								
									
										46
									
								
								archiver/solr-config-dir/lang/stopwords_hy.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								archiver/solr-config-dir/lang/stopwords_hy.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,46 @@ | |||
| # example set of Armenian stopwords. | ||||
| այդ | ||||
| այլ | ||||
| այն | ||||
| այս | ||||
| դու | ||||
| դուք | ||||
| եմ | ||||
| են | ||||
| ենք | ||||
| ես | ||||
| եք | ||||
| է | ||||
| էի | ||||
| էին | ||||
| էինք | ||||
| էիր | ||||
| էիք | ||||
| էր | ||||
| ըստ | ||||
| թ | ||||
| ի | ||||
| ին | ||||
| իսկ | ||||
| իր | ||||
| կամ | ||||
| համար | ||||
| հետ | ||||
| հետո | ||||
| մենք | ||||
| մեջ | ||||
| մի | ||||
| ն | ||||
| նա | ||||
| նաև | ||||
| նրա | ||||
| նրանք | ||||
| որ | ||||
| որը | ||||
| որոնք | ||||
| որպես | ||||
| ու | ||||
| ում | ||||
| պիտի | ||||
| վրա | ||||
| և | ||||
							
								
								
									
										359
									
								
								archiver/solr-config-dir/lang/stopwords_id.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										359
									
								
								archiver/solr-config-dir/lang/stopwords_id.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,359 @@ | |||
| # from appendix D of: A Study of Stemming Effects on Information | ||||
| # Retrieval in Bahasa Indonesia | ||||
| ada | ||||
| adanya | ||||
| adalah | ||||
| adapun | ||||
| agak | ||||
| agaknya | ||||
| agar | ||||
| akan | ||||
| akankah | ||||
| akhirnya | ||||
| aku | ||||
| akulah | ||||
| amat | ||||
| amatlah | ||||
| anda | ||||
| andalah | ||||
| antar | ||||
| diantaranya | ||||
| antara | ||||
| antaranya | ||||
| diantara | ||||
| apa | ||||
| apaan | ||||
| mengapa | ||||
| apabila | ||||
| apakah | ||||
| apalagi | ||||
| apatah | ||||
| atau | ||||
| ataukah | ||||
| ataupun | ||||
| bagai | ||||
| bagaikan | ||||
| sebagai | ||||
| sebagainya | ||||
| bagaimana | ||||
| bagaimanapun | ||||
| sebagaimana | ||||
| bagaimanakah | ||||
| bagi | ||||
| bahkan | ||||
| bahwa | ||||
| bahwasanya | ||||
| sebaliknya | ||||
| banyak | ||||
| sebanyak | ||||
| beberapa | ||||
| seberapa | ||||
| begini | ||||
| beginian | ||||
| beginikah | ||||
| beginilah | ||||
| sebegini | ||||
| begitu | ||||
| begitukah | ||||
| begitulah | ||||
| begitupun | ||||
| sebegitu | ||||
| belum | ||||
| belumlah | ||||
| sebelum | ||||
| sebelumnya | ||||
| sebenarnya | ||||
| berapa | ||||
| berapakah | ||||
| berapalah | ||||
| berapapun | ||||
| betulkah | ||||
| sebetulnya | ||||
| biasa | ||||
| biasanya | ||||
| bila | ||||
| bilakah | ||||
| bisa | ||||
| bisakah | ||||
| sebisanya | ||||
| boleh | ||||
| bolehkah | ||||
| bolehlah | ||||
| buat | ||||
| bukan | ||||
| bukankah | ||||
| bukanlah | ||||
| bukannya | ||||
| cuma | ||||
| percuma | ||||
| dahulu | ||||
| dalam | ||||
| dan | ||||
| dapat | ||||
| dari | ||||
| daripada | ||||
| dekat | ||||
| demi | ||||
| demikian | ||||
| demikianlah | ||||
| sedemikian | ||||
| dengan | ||||
| depan | ||||
| di | ||||
| dia | ||||
| dialah | ||||
| dini | ||||
| diri | ||||
| dirinya | ||||
| terdiri | ||||
| dong | ||||
| dulu | ||||
| enggak | ||||
| enggaknya | ||||
| entah | ||||
| entahlah | ||||
| terhadap | ||||
| terhadapnya | ||||
| hal | ||||
| hampir | ||||
| hanya | ||||
| hanyalah | ||||
| harus | ||||
| haruslah | ||||
| harusnya | ||||
| seharusnya | ||||
| hendak | ||||
| hendaklah | ||||
| hendaknya | ||||
| hingga | ||||
| sehingga | ||||
| ia | ||||
| ialah | ||||
| ibarat | ||||
| ingin | ||||
| inginkah | ||||
| inginkan | ||||
| ini | ||||
| inikah | ||||
| inilah | ||||
| itu | ||||
| itukah | ||||
| itulah | ||||
| jangan | ||||
| jangankan | ||||
| janganlah | ||||
| jika | ||||
| jikalau | ||||
| juga | ||||
| justru | ||||
| kala | ||||
| kalau | ||||
| kalaulah | ||||
| kalaupun | ||||
| kalian | ||||
| kami | ||||
| kamilah | ||||
| kamu | ||||
| kamulah | ||||
| kan | ||||
| kapan | ||||
| kapankah | ||||
| kapanpun | ||||
| dikarenakan | ||||
| karena | ||||
| karenanya | ||||
| ke | ||||
| kecil | ||||
| kemudian | ||||
| kenapa | ||||
| kepada | ||||
| kepadanya | ||||
| ketika | ||||
| seketika | ||||
| khususnya | ||||
| kini | ||||
| kinilah | ||||
| kiranya | ||||
| sekiranya | ||||
| kita | ||||
| kitalah | ||||
| kok | ||||
| lagi | ||||
| lagian | ||||
| selagi | ||||
| lah | ||||
| lain | ||||
| lainnya | ||||
| melainkan | ||||
| selaku | ||||
| lalu | ||||
| melalui | ||||
| terlalu | ||||
| lama | ||||
| lamanya | ||||
| selama | ||||
| selama | ||||
| selamanya | ||||
| lebih | ||||
| terlebih | ||||
| bermacam | ||||
| macam | ||||
| semacam | ||||
| maka | ||||
| makanya | ||||
| makin | ||||
| malah | ||||
| malahan | ||||
| mampu | ||||
| mampukah | ||||
| mana | ||||
| manakala | ||||
| manalagi | ||||
| masih | ||||
| masihkah | ||||
| semasih | ||||
| masing | ||||
| mau | ||||
| maupun | ||||
| semaunya | ||||
| memang | ||||
| mereka | ||||
| merekalah | ||||
| meski | ||||
| meskipun | ||||
| semula | ||||
| mungkin | ||||
| mungkinkah | ||||
| nah | ||||
| namun | ||||
| nanti | ||||
| nantinya | ||||
| nyaris | ||||
| oleh | ||||
| olehnya | ||||
| seorang | ||||
| seseorang | ||||
| pada | ||||
| padanya | ||||
| padahal | ||||
| paling | ||||
| sepanjang | ||||
| pantas | ||||
| sepantasnya | ||||
| sepantasnyalah | ||||
| para | ||||
| pasti | ||||
| pastilah | ||||
| per | ||||
| pernah | ||||
| pula | ||||
| pun | ||||
| merupakan | ||||
| rupanya | ||||
| serupa | ||||
| saat | ||||
| saatnya | ||||
| sesaat | ||||
| saja | ||||
| sajalah | ||||
| saling | ||||
| bersama | ||||
| sama | ||||
| sesama | ||||
| sambil | ||||
| sampai | ||||
| sana | ||||
| sangat | ||||
| sangatlah | ||||
| saya | ||||
| sayalah | ||||
| se | ||||
| sebab | ||||
| sebabnya | ||||
| sebuah | ||||
| tersebut | ||||
| tersebutlah | ||||
| sedang | ||||
| sedangkan | ||||
| sedikit | ||||
| sedikitnya | ||||
| segala | ||||
| segalanya | ||||
| segera | ||||
| sesegera | ||||
| sejak | ||||
| sejenak | ||||
| sekali | ||||
| sekalian | ||||
| sekalipun | ||||
| sesekali | ||||
| sekaligus | ||||
| sekarang | ||||
| sekarang | ||||
| sekitar | ||||
| sekitarnya | ||||
| sela | ||||
| selain | ||||
| selalu | ||||
| seluruh | ||||
| seluruhnya | ||||
| semakin | ||||
| sementara | ||||
| sempat | ||||
| semua | ||||
| semuanya | ||||
| sendiri | ||||
| sendirinya | ||||
| seolah | ||||
| seperti | ||||
| sepertinya | ||||
| sering | ||||
| seringnya | ||||
| serta | ||||
| siapa | ||||
| siapakah | ||||
| siapapun | ||||
| disini | ||||
| disinilah | ||||
| sini | ||||
| sinilah | ||||
| sesuatu | ||||
| sesuatunya | ||||
| suatu | ||||
| sesudah | ||||
| sesudahnya | ||||
| sudah | ||||
| sudahkah | ||||
| sudahlah | ||||
| supaya | ||||
| tadi | ||||
| tadinya | ||||
| tak | ||||
| tanpa | ||||
| setelah | ||||
| telah | ||||
| tentang | ||||
| tentu | ||||
| tentulah | ||||
| tentunya | ||||
| tertentu | ||||
| seterusnya | ||||
| tapi | ||||
| tetapi | ||||
| setiap | ||||
| tiap | ||||
| setidaknya | ||||
| tidak | ||||
| tidakkah | ||||
| tidaklah | ||||
| toh | ||||
| waduh | ||||
| wah | ||||
| wahai | ||||
| sewaktu | ||||
| walau | ||||
| walaupun | ||||
| wong | ||||
| yaitu | ||||
| yakni | ||||
| yang | ||||
							
								
								
									
										303
									
								
								archiver/solr-config-dir/lang/stopwords_it.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										303
									
								
								archiver/solr-config-dir/lang/stopwords_it.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,303 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | An Italian stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
| ad             |  a (to) before vowel | ||||
| al             |  a + il | ||||
| allo           |  a + lo | ||||
| ai             |  a + i | ||||
| agli           |  a + gli | ||||
| all            |  a + l' | ||||
| agl            |  a + gl' | ||||
| alla           |  a + la | ||||
| alle           |  a + le | ||||
| con            |  with | ||||
| col            |  con + il | ||||
| coi            |  con + i (forms collo, cogli etc are now very rare) | ||||
| da             |  from | ||||
| dal            |  da + il | ||||
| dallo          |  da + lo | ||||
| dai            |  da + i | ||||
| dagli          |  da + gli | ||||
| dall           |  da + l' | ||||
| dagl           |  da + gll' | ||||
| dalla          |  da + la | ||||
| dalle          |  da + le | ||||
| di             |  of | ||||
| del            |  di + il | ||||
| dello          |  di + lo | ||||
| dei            |  di + i | ||||
| degli          |  di + gli | ||||
| dell           |  di + l' | ||||
| degl           |  di + gl' | ||||
| della          |  di + la | ||||
| delle          |  di + le | ||||
| in             |  in | ||||
| nel            |  in + el | ||||
| nello          |  in + lo | ||||
| nei            |  in + i | ||||
| negli          |  in + gli | ||||
| nell           |  in + l' | ||||
| negl           |  in + gl' | ||||
| nella          |  in + la | ||||
| nelle          |  in + le | ||||
| su             |  on | ||||
| sul            |  su + il | ||||
| sullo          |  su + lo | ||||
| sui            |  su + i | ||||
| sugli          |  su + gli | ||||
| sull           |  su + l' | ||||
| sugl           |  su + gl' | ||||
| sulla          |  su + la | ||||
| sulle          |  su + le | ||||
| per            |  through, by | ||||
| tra            |  among | ||||
| contro         |  against | ||||
| io             |  I | ||||
| tu             |  thou | ||||
| lui            |  he | ||||
| lei            |  she | ||||
| noi            |  we | ||||
| voi            |  you | ||||
| loro           |  they | ||||
| mio            |  my | ||||
| mia            | | ||||
| miei           | | ||||
| mie            | | ||||
| tuo            | | ||||
| tua            | | ||||
| tuoi           |  thy | ||||
| tue            | | ||||
| suo            | | ||||
| sua            | | ||||
| suoi           |  his, her | ||||
| sue            | | ||||
| nostro         |  our | ||||
| nostra         | | ||||
| nostri         | | ||||
| nostre         | | ||||
| vostro         |  your | ||||
| vostra         | | ||||
| vostri         | | ||||
| vostre         | | ||||
| mi             |  me | ||||
| ti             |  thee | ||||
| ci             |  us, there | ||||
| vi             |  you, there | ||||
| lo             |  him, the | ||||
| la             |  her, the | ||||
| li             |  them | ||||
| le             |  them, the | ||||
| gli            |  to him, the | ||||
| ne             |  from there etc | ||||
| il             |  the | ||||
| un             |  a | ||||
| uno            |  a | ||||
| una            |  a | ||||
| ma             |  but | ||||
| ed             |  and | ||||
| se             |  if | ||||
| perché         |  why, because | ||||
| anche          |  also | ||||
| come           |  how | ||||
| dov            |  where (as dov') | ||||
| dove           |  where | ||||
| che            |  who, that | ||||
| chi            |  who | ||||
| cui            |  whom | ||||
| non            |  not | ||||
| più            |  more | ||||
| quale          |  who, that | ||||
| quanto         |  how much | ||||
| quanti         | | ||||
| quanta         | | ||||
| quante         | | ||||
| quello         |  that | ||||
| quelli         | | ||||
| quella         | | ||||
| quelle         | | ||||
| questo         |  this | ||||
| questi         | | ||||
| questa         | | ||||
| queste         | | ||||
| si             |  yes | ||||
| tutto          |  all | ||||
| tutti          |  all | ||||
| 
 | ||||
|                |  single letter forms: | ||||
| 
 | ||||
| a              |  at | ||||
| c              |  as c' for ce or ci | ||||
| e              |  and | ||||
| i              |  the | ||||
| l              |  as l' | ||||
| o              |  or | ||||
| 
 | ||||
|                | forms of avere, to have (not including the infinitive): | ||||
| 
 | ||||
| ho | ||||
| hai | ||||
| ha | ||||
| abbiamo | ||||
| avete | ||||
| hanno | ||||
| abbia | ||||
| abbiate | ||||
| abbiano | ||||
| avrò | ||||
| avrai | ||||
| avrà | ||||
| avremo | ||||
| avrete | ||||
| avranno | ||||
| avrei | ||||
| avresti | ||||
| avrebbe | ||||
| avremmo | ||||
| avreste | ||||
| avrebbero | ||||
| avevo | ||||
| avevi | ||||
| aveva | ||||
| avevamo | ||||
| avevate | ||||
| avevano | ||||
| ebbi | ||||
| avesti | ||||
| ebbe | ||||
| avemmo | ||||
| aveste | ||||
| ebbero | ||||
| avessi | ||||
| avesse | ||||
| avessimo | ||||
| avessero | ||||
| avendo | ||||
| avuto | ||||
| avuta | ||||
| avuti | ||||
| avute | ||||
| 
 | ||||
|                | forms of essere, to be (not including the infinitive): | ||||
| sono | ||||
| sei | ||||
| è | ||||
| siamo | ||||
| siete | ||||
| sia | ||||
| siate | ||||
| siano | ||||
| sarò | ||||
| sarai | ||||
| sarà | ||||
| saremo | ||||
| sarete | ||||
| saranno | ||||
| sarei | ||||
| saresti | ||||
| sarebbe | ||||
| saremmo | ||||
| sareste | ||||
| sarebbero | ||||
| ero | ||||
| eri | ||||
| era | ||||
| eravamo | ||||
| eravate | ||||
| erano | ||||
| fui | ||||
| fosti | ||||
| fu | ||||
| fummo | ||||
| foste | ||||
| furono | ||||
| fossi | ||||
| fosse | ||||
| fossimo | ||||
| fossero | ||||
| essendo | ||||
| 
 | ||||
|                | forms of fare, to do (not including the infinitive, fa, fat-): | ||||
| faccio | ||||
| fai | ||||
| facciamo | ||||
| fanno | ||||
| faccia | ||||
| facciate | ||||
| facciano | ||||
| farò | ||||
| farai | ||||
| farà | ||||
| faremo | ||||
| farete | ||||
| faranno | ||||
| farei | ||||
| faresti | ||||
| farebbe | ||||
| faremmo | ||||
| fareste | ||||
| farebbero | ||||
| facevo | ||||
| facevi | ||||
| faceva | ||||
| facevamo | ||||
| facevate | ||||
| facevano | ||||
| feci | ||||
| facesti | ||||
| fece | ||||
| facemmo | ||||
| faceste | ||||
| fecero | ||||
| facessi | ||||
| facesse | ||||
| facessimo | ||||
| facessero | ||||
| facendo | ||||
| 
 | ||||
|                | forms of stare, to be (not including the infinitive): | ||||
| sto | ||||
| stai | ||||
| sta | ||||
| stiamo | ||||
| stanno | ||||
| stia | ||||
| stiate | ||||
| stiano | ||||
| starò | ||||
| starai | ||||
| starà | ||||
| staremo | ||||
| starete | ||||
| staranno | ||||
| starei | ||||
| staresti | ||||
| starebbe | ||||
| staremmo | ||||
| stareste | ||||
| starebbero | ||||
| stavo | ||||
| stavi | ||||
| stava | ||||
| stavamo | ||||
| stavate | ||||
| stavano | ||||
| stetti | ||||
| stesti | ||||
| stette | ||||
| stemmo | ||||
| steste | ||||
| stettero | ||||
| stessi | ||||
| stesse | ||||
| stessimo | ||||
| stessero | ||||
| stando | ||||
							
								
								
									
										127
									
								
								archiver/solr-config-dir/lang/stopwords_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								archiver/solr-config-dir/lang/stopwords_ja.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,127 @@ | |||
| # | ||||
| # This file defines a stopword set for Japanese. | ||||
| # | ||||
| # This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. | ||||
| # Punctuation characters and frequent kanji have mostly been left out.  See LUCENE-3745 | ||||
| # for frequency lists, etc. that can be useful for making your own set (if desired) | ||||
| # | ||||
| # Note that there is an overlap between these stopwords and the terms stopped when used | ||||
| # in combination with the JapanesePartOfSpeechStopFilter.  When editing this file, note | ||||
| # that comments are not allowed on the same line as stopwords. | ||||
| # | ||||
| # Also note that stopping is done in a case-insensitive manner.  Change your StopFilter | ||||
| # configuration if you need case-sensitive stopping.  Lastly, note that stopping is done | ||||
| # using the same character width as the entries in this file.  Since this StopFilter is | ||||
| # normally done after a CJKWidthFilter in your chain, you would usually want your romaji | ||||
| # entries to be in half-width and your kana entries to be in full-width. | ||||
| # | ||||
| の | ||||
| に | ||||
| は | ||||
| を | ||||
| た | ||||
| が | ||||
| で | ||||
| て | ||||
| と | ||||
| し | ||||
| れ | ||||
| さ | ||||
| ある | ||||
| いる | ||||
| も | ||||
| する | ||||
| から | ||||
| な | ||||
| こと | ||||
| として | ||||
| い | ||||
| や | ||||
| れる | ||||
| など | ||||
| なっ | ||||
| ない | ||||
| この | ||||
| ため | ||||
| その | ||||
| あっ | ||||
| よう | ||||
| また | ||||
| もの | ||||
| という | ||||
| あり | ||||
| まで | ||||
| られ | ||||
| なる | ||||
| へ | ||||
| か | ||||
| だ | ||||
| これ | ||||
| によって | ||||
| により | ||||
| おり | ||||
| より | ||||
| による | ||||
| ず | ||||
| なり | ||||
| られる | ||||
| において | ||||
| ば | ||||
| なかっ | ||||
| なく | ||||
| しかし | ||||
| について | ||||
| せ | ||||
| だっ | ||||
| その後 | ||||
| できる | ||||
| それ | ||||
| う | ||||
| ので | ||||
| なお | ||||
| のみ | ||||
| でき | ||||
| き | ||||
| つ | ||||
| における | ||||
| および | ||||
| いう | ||||
| さらに | ||||
| でも | ||||
| ら | ||||
| たり | ||||
| その他 | ||||
| に関する | ||||
| たち | ||||
| ます | ||||
| ん | ||||
| なら | ||||
| に対して | ||||
| 特に | ||||
| せる | ||||
| 及び | ||||
| これら | ||||
| とき | ||||
| では | ||||
| にて | ||||
| ほか | ||||
| ながら | ||||
| うち | ||||
| そして | ||||
| とともに | ||||
| ただし | ||||
| かつて | ||||
| それぞれ | ||||
| または | ||||
| お | ||||
| ほど | ||||
| ものの | ||||
| に対する | ||||
| ほとんど | ||||
| と共に | ||||
| といった | ||||
| です | ||||
| とも | ||||
| ところ | ||||
| ここ | ||||
| ##### End of file | ||||
							
								
								
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_lv.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										172
									
								
								archiver/solr-config-dir/lang/stopwords_lv.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,172 @@ | |||
| # Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins | ||||
| # the original list of over 800 forms was refined:  | ||||
| #   pronouns, adverbs, interjections were removed | ||||
| #  | ||||
| # prepositions | ||||
| aiz | ||||
| ap | ||||
| ar | ||||
| apakš | ||||
| ārpus | ||||
| augšpus | ||||
| bez | ||||
| caur | ||||
| dēļ | ||||
| gar | ||||
| iekš | ||||
| iz | ||||
| kopš | ||||
| labad | ||||
| lejpus | ||||
| līdz | ||||
| no | ||||
| otrpus | ||||
| pa | ||||
| par | ||||
| pār | ||||
| pēc | ||||
| pie | ||||
| pirms | ||||
| pret | ||||
| priekš | ||||
| starp | ||||
| šaipus | ||||
| uz | ||||
| viņpus | ||||
| virs | ||||
| virspus | ||||
| zem | ||||
| apakšpus | ||||
| # Conjunctions | ||||
| un | ||||
| bet | ||||
| jo | ||||
| ja | ||||
| ka | ||||
| lai | ||||
| tomēr | ||||
| tikko | ||||
| turpretī | ||||
| arī | ||||
| kaut | ||||
| gan | ||||
| tādēļ | ||||
| tā | ||||
| ne | ||||
| tikvien | ||||
| vien | ||||
| kā | ||||
| ir | ||||
| te | ||||
| vai | ||||
| kamēr | ||||
| # Particles | ||||
| ar | ||||
| diezin | ||||
| droši | ||||
| diemžēl | ||||
| nebūt | ||||
| ik | ||||
| it | ||||
| taču | ||||
| nu | ||||
| pat | ||||
| tiklab | ||||
| iekšpus | ||||
| nedz | ||||
| tik | ||||
| nevis | ||||
| turpretim | ||||
| jeb | ||||
| iekam | ||||
| iekām | ||||
| iekāms | ||||
| kolīdz | ||||
| līdzko | ||||
| tiklīdz | ||||
| jebšu | ||||
| tālab | ||||
| tāpēc | ||||
| nekā | ||||
| itin | ||||
| jā | ||||
| jau | ||||
| jel | ||||
| nē | ||||
| nezin | ||||
| tad | ||||
| tikai | ||||
| vis | ||||
| tak | ||||
| iekams | ||||
| vien | ||||
| # modal verbs | ||||
| būt   | ||||
| biju  | ||||
| biji | ||||
| bija | ||||
| bijām | ||||
| bijāt | ||||
| esmu | ||||
| esi | ||||
| esam | ||||
| esat  | ||||
| būšu      | ||||
| būsi | ||||
| būs | ||||
| būsim | ||||
| būsiet | ||||
| tikt | ||||
| tiku | ||||
| tiki | ||||
| tika | ||||
| tikām | ||||
| tikāt | ||||
| tieku | ||||
| tiec | ||||
| tiek | ||||
| tiekam | ||||
| tiekat | ||||
| tikšu | ||||
| tiks | ||||
| tiksim | ||||
| tiksiet | ||||
| tapt | ||||
| tapi | ||||
| tapāt | ||||
| topat | ||||
| tapšu | ||||
| tapsi | ||||
| taps | ||||
| tapsim | ||||
| tapsiet | ||||
| kļūt | ||||
| kļuvu | ||||
| kļuvi | ||||
| kļuva | ||||
| kļuvām | ||||
| kļuvāt | ||||
| kļūstu | ||||
| kļūsti | ||||
| kļūst | ||||
| kļūstam | ||||
| kļūstat | ||||
| kļūšu | ||||
| kļūsi | ||||
| kļūs | ||||
| kļūsim | ||||
| kļūsiet | ||||
| # verbs | ||||
| varēt | ||||
| varēju | ||||
| varējām | ||||
| varēšu | ||||
| varēsim | ||||
| var | ||||
| varēji | ||||
| varējāt | ||||
| varēsi | ||||
| varēsiet | ||||
| varat | ||||
| varēja | ||||
| varēs | ||||
							
								
								
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_nl.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_nl.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,119 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Dutch stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | This is a ranked list (commonest to rarest) of stopwords derived from | ||||
|  | a large sample of Dutch text. | ||||
| 
 | ||||
|  | Dutch stop words frequently exhibit homonym clashes. These are indicated | ||||
|  | clearly below. | ||||
| 
 | ||||
| de             |  the | ||||
| en             |  and | ||||
| van            |  of, from | ||||
| ik             |  I, the ego | ||||
| te             |  (1) chez, at etc, (2) to, (3) too | ||||
| dat            |  that, which | ||||
| die            |  that, those, who, which | ||||
| in             |  in, inside | ||||
| een            |  a, an, one | ||||
| hij            |  he | ||||
| het            |  the, it | ||||
| niet           |  not, nothing, naught | ||||
| zijn           |  (1) to be, being, (2) his, one's, its | ||||
| is             |  is | ||||
| was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river | ||||
| op             |  on, upon, at, in, up, used up | ||||
| aan            |  on, upon, to (as dative) | ||||
| met            |  with, by | ||||
| als            |  like, such as, when | ||||
| voor           |  (1) before, in front of, (2) furrow | ||||
| had            |  had, past tense all persons sing. of 'hebben' (have) | ||||
| er             |  there | ||||
| maar           |  but, only | ||||
| om             |  round, about, for etc | ||||
| hem            |  him | ||||
| dan            |  then | ||||
| zou            |  should/would, past tense all persons sing. of 'zullen' | ||||
| of             |  or, whether, if | ||||
| wat            |  what, something, anything | ||||
| mijn           |  possessive and noun 'mine' | ||||
| men            |  people, 'one' | ||||
| dit            |  this | ||||
| zo             |  so, thus, in this way | ||||
| door           |  through by | ||||
| over           |  over, across | ||||
| ze             |  she, her, they, them | ||||
| zich           |  oneself | ||||
| bij            |  (1) a bee, (2) by, near, at | ||||
| ook            |  also, too | ||||
| tot            |  till, until | ||||
| je             |  you | ||||
| mij            |  me | ||||
| uit            |  out of, from | ||||
| der            |  Old Dutch form of 'van der' still found in surnames | ||||
| daar           |  (1) there, (2) because | ||||
| haar           |  (1) her, their, them, (2) hair | ||||
| naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as | ||||
| heb            |  present first person sing. of 'to have' | ||||
| hoe            |  how, why | ||||
| heeft          |  present third person sing. of 'to have' | ||||
| hebben         |  'to have' and various parts thereof | ||||
| deze           |  this | ||||
| u              |  you | ||||
| want           |  (1) for, (2) mitten, (3) rigging | ||||
| nog            |  yet, still | ||||
| zal            |  'shall', first and third person sing. of verb 'zullen' (will) | ||||
| me             |  me | ||||
| zij            |  she, they | ||||
| nu             |  now | ||||
| ge             |  'thou', still used in Belgium and south Netherlands | ||||
| geen           |  none | ||||
| omdat          |  because | ||||
| iets           |  something, somewhat | ||||
| worden         |  to become, grow, get | ||||
| toch           |  yet, still | ||||
| al             |  all, every, each | ||||
| waren          |  (1) 'were' (2) to wander, (3) wares, (3) | ||||
| veel           |  much, many | ||||
| meer           |  (1) more, (2) lake | ||||
| doen           |  to do, to make | ||||
| toen           |  then, when | ||||
| moet           |  noun 'spot/mote' and present form of 'to must' | ||||
| ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be' | ||||
| zonder         |  without | ||||
| kan            |  noun 'can' and present form of 'to be able' | ||||
| hun            |  their, them | ||||
| dus            |  so, consequently | ||||
| alles          |  all, everything, anything | ||||
| onder          |  under, beneath | ||||
| ja             |  yes, of course | ||||
| eens           |  once, one day | ||||
| hier           |  here | ||||
| wie            |  who | ||||
| werd           |  imperfect third person sing. of 'become' | ||||
| altijd         |  always | ||||
| doch           |  yet, but etc | ||||
| wordt          |  present third person sing. of 'become' | ||||
| wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans | ||||
| kunnen         |  to be able | ||||
| ons            |  us/our | ||||
| zelf           |  self | ||||
| tegen          |  against, towards, at | ||||
| na             |  after, near | ||||
| reeds          |  already | ||||
| wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender | ||||
| kon            |  could; past tense of 'to be able' | ||||
| niets          |  nothing | ||||
| uw             |  your | ||||
| iemand         |  somebody | ||||
| geweest        |  been; past participle of 'be' | ||||
| andere         |  other | ||||
							
								
								
									
										194
									
								
								archiver/solr-config-dir/lang/stopwords_no.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										194
									
								
								archiver/solr-config-dir/lang/stopwords_no.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,194 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Norwegian stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | This stop word list is for the dominant bokmål dialect. Words unique | ||||
|  | to nynorsk are marked *. | ||||
| 
 | ||||
|  | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005 | ||||
| 
 | ||||
| og             | and | ||||
| i              | in | ||||
| jeg            | I | ||||
| det            | it/this/that | ||||
| at             | to (w. inf.) | ||||
| en             | a/an | ||||
| et             | a/an | ||||
| den            | it/this/that | ||||
| til            | to | ||||
| er             | is/am/are | ||||
| som            | who/that | ||||
| på             | on | ||||
| de             | they / you(formal) | ||||
| med            | with | ||||
| han            | he | ||||
| av             | of | ||||
| ikke           | not | ||||
| ikkje          | not * | ||||
| der            | there | ||||
| så             | so | ||||
| var            | was/were | ||||
| meg            | me | ||||
| seg            | you | ||||
| men            | but | ||||
| ett            | one | ||||
| har            | have | ||||
| om             | about | ||||
| vi             | we | ||||
| min            | my | ||||
| mitt           | my | ||||
| ha             | have | ||||
| hadde          | had | ||||
| hun            | she | ||||
| nå             | now | ||||
| over           | over | ||||
| da             | when/as | ||||
| ved            | by/know | ||||
| fra            | from | ||||
| du             | you | ||||
| ut             | out | ||||
| sin            | your | ||||
| dem            | them | ||||
| oss            | us | ||||
| opp            | up | ||||
| man            | you/one | ||||
| kan            | can | ||||
| hans           | his | ||||
| hvor           | where | ||||
| eller          | or | ||||
| hva            | what | ||||
| skal           | shall/must | ||||
| selv           | self (reflective) | ||||
| sjøl           | self (reflective) | ||||
| her            | here | ||||
| alle           | all | ||||
| vil            | will | ||||
| bli            | become | ||||
| ble            | became | ||||
| blei           | became * | ||||
| blitt          | have become | ||||
| kunne          | could | ||||
| inn            | in | ||||
| når            | when | ||||
| være           | be | ||||
| kom            | come | ||||
| noen           | some | ||||
| noe            | some | ||||
| ville          | would | ||||
| dere           | you | ||||
| som            | who/which/that | ||||
| deres          | their/theirs | ||||
| kun            | only/just | ||||
| ja             | yes | ||||
| etter          | after | ||||
| ned            | down | ||||
| skulle         | should | ||||
| denne          | this | ||||
| for            | for/because | ||||
| deg            | you | ||||
| si             | hers/his | ||||
| sine           | hers/his | ||||
| sitt           | hers/his | ||||
| mot            | against | ||||
| å              | to | ||||
| meget          | much | ||||
| hvorfor        | why | ||||
| dette          | this | ||||
| disse          | these/those | ||||
| uten           | without | ||||
| hvordan        | how | ||||
| ingen          | none | ||||
| din            | your | ||||
| ditt           | your | ||||
| blir           | become | ||||
| samme          | same | ||||
| hvilken        | which | ||||
| hvilke         | which (plural) | ||||
| sånn           | such a | ||||
| inni           | inside/within | ||||
| mellom         | between | ||||
| vår            | our | ||||
| hver           | each | ||||
| hvem           | who | ||||
| vors           | us/ours | ||||
| hvis           | whose | ||||
| både           | both | ||||
| bare           | only/just | ||||
| enn            | than | ||||
| fordi          | as/because | ||||
| før            | before | ||||
| mange          | many | ||||
| også           | also | ||||
| slik           | just | ||||
| vært           | been | ||||
| være           | to be | ||||
| båe            | both * | ||||
| begge          | both | ||||
| siden          | since | ||||
| dykk           | your * | ||||
| dykkar         | yours * | ||||
| dei            | they * | ||||
| deira          | them * | ||||
| deires         | theirs * | ||||
| deim           | them * | ||||
| di             | your (fem.) * | ||||
| då             | as/when * | ||||
| eg             | I * | ||||
| ein            | a/an * | ||||
| eit            | a/an * | ||||
| eitt           | a/an * | ||||
| elles          | or * | ||||
| honom          | he * | ||||
| hjå            | at * | ||||
| ho             | she * | ||||
| hoe            | she * | ||||
| henne          | her | ||||
| hennar         | her/hers | ||||
| hennes         | hers | ||||
| hoss           | how * | ||||
| hossen         | how * | ||||
| ikkje          | not * | ||||
| ingi           | noone * | ||||
| inkje          | noone * | ||||
| korleis        | how * | ||||
| korso          | how * | ||||
| kva            | what/which * | ||||
| kvar           | where * | ||||
| kvarhelst      | where * | ||||
| kven           | who/whom * | ||||
| kvi            | why * | ||||
| kvifor         | why * | ||||
| me             | we * | ||||
| medan          | while * | ||||
| mi             | my * | ||||
| mine           | my * | ||||
| mykje          | much * | ||||
| no             | now * | ||||
| nokon          | some (masc./neut.) * | ||||
| noka           | some (fem.) * | ||||
| nokor          | some * | ||||
| noko           | some * | ||||
| nokre          | some * | ||||
| si             | his/hers * | ||||
| sia            | since * | ||||
| sidan          | since * | ||||
| so             | so * | ||||
| somt           | some * | ||||
| somme          | some * | ||||
| um             | about* | ||||
| upp            | up * | ||||
| vere           | be * | ||||
| vore           | was * | ||||
| verte          | become * | ||||
| vort           | become * | ||||
| varte          | became * | ||||
| vart           | became * | ||||
| 
 | ||||
							
								
								
									
										253
									
								
								archiver/solr-config-dir/lang/stopwords_pt.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								archiver/solr-config-dir/lang/stopwords_pt.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,253 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Portuguese stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
| 
 | ||||
|  | The following is a ranked list (commonest to rarest) of stopwords | ||||
|  | deriving from a large sample of text. | ||||
| 
 | ||||
|  | Extra words have been added at the end. | ||||
| 
 | ||||
| de             |  of, from | ||||
| a              |  the; to, at; her | ||||
| o              |  the; him | ||||
| que            |  who, that | ||||
| e              |  and | ||||
| do             |  de + o | ||||
| da             |  de + a | ||||
| em             |  in | ||||
| um             |  a | ||||
| para           |  for | ||||
|   | é          from SER | ||||
| com            |  with | ||||
| não            |  not, no | ||||
| uma            |  a | ||||
| os             |  the; them | ||||
| no             |  em + o | ||||
| se             |  himself etc | ||||
| na             |  em + a | ||||
| por            |  for | ||||
| mais           |  more | ||||
| as             |  the; them | ||||
| dos            |  de + os | ||||
| como           |  as, like | ||||
| mas            |  but | ||||
|   | foi        from SER | ||||
| ao             |  a + o | ||||
| ele            |  he | ||||
| das            |  de + as | ||||
|   | tem        from TER | ||||
| à              |  a + a | ||||
| seu            |  his | ||||
| sua            |  her | ||||
| ou             |  or | ||||
|   | ser        from SER | ||||
| quando         |  when | ||||
| muito          |  much | ||||
|   | há         from HAV | ||||
| nos            |  em + os; us | ||||
| já             |  already, now | ||||
|   | está       from EST | ||||
| eu             |  I | ||||
| também         |  also | ||||
| só             |  only, just | ||||
| pelo           |  per + o | ||||
| pela           |  per + a | ||||
| até            |  up to | ||||
| isso           |  that | ||||
| ela            |  he | ||||
| entre          |  between | ||||
|   | era        from SER | ||||
| depois         |  after | ||||
| sem            |  without | ||||
| mesmo          |  same | ||||
| aos            |  a + os | ||||
|   | ter        from TER | ||||
| seus           |  his | ||||
| quem           |  whom | ||||
| nas            |  em + as | ||||
| me             |  me | ||||
| esse           |  that | ||||
| eles           |  they | ||||
|   | estão      from EST | ||||
| você           |  you | ||||
|   | tinha      from TER | ||||
|   | foram      from SER | ||||
| essa           |  that | ||||
| num            |  em + um | ||||
| nem            |  nor | ||||
| suas           |  her | ||||
| meu            |  my | ||||
| às             |  a + as | ||||
| minha          |  my | ||||
|   | têm        from TER | ||||
| numa           |  em + uma | ||||
| pelos          |  per + os | ||||
| elas           |  they | ||||
|   | havia      from HAV | ||||
|   | seja       from SER | ||||
| qual           |  which | ||||
|   | será       from SER | ||||
| nós            |  we | ||||
|   | tenho      from TER | ||||
| lhe            |  to him, her | ||||
| deles          |  of them | ||||
| essas          |  those | ||||
| esses          |  those | ||||
| pelas          |  per + as | ||||
| este           |  this | ||||
|   | fosse      from SER | ||||
| dele           |  of him | ||||
| 
 | ||||
|  | other words. There are many contractions such as naquele = em+aquele, | ||||
|  | mo = me+o, but they are rare. | ||||
|  | Indefinite article plural forms are also rare. | ||||
| 
 | ||||
| tu             |  thou | ||||
| te             |  thee | ||||
| vocês          |  you (plural) | ||||
| vos            |  you | ||||
| lhes           |  to them | ||||
| meus           |  my | ||||
| minhas | ||||
| teu            |  thy | ||||
| tua | ||||
| teus | ||||
| tuas | ||||
| nosso          | our | ||||
| nossa | ||||
| nossos | ||||
| nossas | ||||
| 
 | ||||
| dela           |  of her | ||||
| delas          |  of them | ||||
| 
 | ||||
| esta           |  this | ||||
| estes          |  these | ||||
| estas          |  these | ||||
| aquele         |  that | ||||
| aquela         |  that | ||||
| aqueles        |  those | ||||
| aquelas        |  those | ||||
| isto           |  this | ||||
| aquilo         |  that | ||||
| 
 | ||||
|                | forms of estar, to be (not including the infinitive): | ||||
| estou | ||||
| está | ||||
| estamos | ||||
| estão | ||||
| estive | ||||
| esteve | ||||
| estivemos | ||||
| estiveram | ||||
| estava | ||||
| estávamos | ||||
| estavam | ||||
| estivera | ||||
| estivéramos | ||||
| esteja | ||||
| estejamos | ||||
| estejam | ||||
| estivesse | ||||
| estivéssemos | ||||
| estivessem | ||||
| estiver | ||||
| estivermos | ||||
| estiverem | ||||
| 
 | ||||
|                | forms of haver, to have (not including the infinitive): | ||||
| hei | ||||
| há | ||||
| havemos | ||||
| hão | ||||
| houve | ||||
| houvemos | ||||
| houveram | ||||
| houvera | ||||
| houvéramos | ||||
| haja | ||||
| hajamos | ||||
| hajam | ||||
| houvesse | ||||
| houvéssemos | ||||
| houvessem | ||||
| houver | ||||
| houvermos | ||||
| houverem | ||||
| houverei | ||||
| houverá | ||||
| houveremos | ||||
| houverão | ||||
| houveria | ||||
| houveríamos | ||||
| houveriam | ||||
| 
 | ||||
|                | forms of ser, to be (not including the infinitive): | ||||
| sou | ||||
| somos | ||||
| são | ||||
| era | ||||
| éramos | ||||
| eram | ||||
| fui | ||||
| foi | ||||
| fomos | ||||
| foram | ||||
| fora | ||||
| fôramos | ||||
| seja | ||||
| sejamos | ||||
| sejam | ||||
| fosse | ||||
| fôssemos | ||||
| fossem | ||||
| for | ||||
| formos | ||||
| forem | ||||
| serei | ||||
| será | ||||
| seremos | ||||
| serão | ||||
| seria | ||||
| seríamos | ||||
| seriam | ||||
| 
 | ||||
|                | forms of ter, to have (not including the infinitive): | ||||
| tenho | ||||
| tem | ||||
| temos | ||||
| tém | ||||
| tinha | ||||
| tínhamos | ||||
| tinham | ||||
| tive | ||||
| teve | ||||
| tivemos | ||||
| tiveram | ||||
| tivera | ||||
| tivéramos | ||||
| tenha | ||||
| tenhamos | ||||
| tenham | ||||
| tivesse | ||||
| tivéssemos | ||||
| tivessem | ||||
| tiver | ||||
| tivermos | ||||
| tiverem | ||||
| terei | ||||
| terá | ||||
| teremos | ||||
| terão | ||||
| teria | ||||
| teríamos | ||||
| teriam | ||||
							
								
								
									
										233
									
								
								archiver/solr-config-dir/lang/stopwords_ro.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								archiver/solr-config-dir/lang/stopwords_ro.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,233 @@ | |||
| # This file was created by Jacques Savoy and is distributed under the BSD license. | ||||
| # See http://members.unine.ch/jacques.savoy/clef/index.html. | ||||
| # Also see http://www.opensource.org/licenses/bsd-license.html | ||||
| acea | ||||
| aceasta | ||||
| această | ||||
| aceea | ||||
| acei | ||||
| aceia | ||||
| acel | ||||
| acela | ||||
| acele | ||||
| acelea | ||||
| acest | ||||
| acesta | ||||
| aceste | ||||
| acestea | ||||
| aceşti | ||||
| aceştia | ||||
| acolo | ||||
| acum | ||||
| ai | ||||
| aia | ||||
| aibă | ||||
| aici | ||||
| al | ||||
| ăla | ||||
| ale | ||||
| alea | ||||
| ălea | ||||
| altceva | ||||
| altcineva | ||||
| am | ||||
| ar | ||||
| are | ||||
| aş | ||||
| aşadar | ||||
| asemenea | ||||
| asta | ||||
| ăsta | ||||
| astăzi | ||||
| astea | ||||
| ăstea | ||||
| ăştia | ||||
| asupra | ||||
| aţi | ||||
| au | ||||
| avea | ||||
| avem | ||||
| aveţi | ||||
| azi | ||||
| bine | ||||
| bucur | ||||
| bună | ||||
| ca | ||||
| că | ||||
| căci | ||||
| când | ||||
| care | ||||
| cărei | ||||
| căror | ||||
| cărui | ||||
| cât | ||||
| câte | ||||
| câţi | ||||
| către | ||||
| câtva | ||||
| ce | ||||
| cel | ||||
| ceva | ||||
| chiar | ||||
| cînd | ||||
| cine | ||||
| cineva | ||||
| cît | ||||
| cîte | ||||
| cîţi | ||||
| cîtva | ||||
| contra | ||||
| cu | ||||
| cum | ||||
| cumva | ||||
| curând | ||||
| curînd | ||||
| da | ||||
| dă | ||||
| dacă | ||||
| dar | ||||
| datorită | ||||
| de | ||||
| deci | ||||
| deja | ||||
| deoarece | ||||
| departe | ||||
| deşi | ||||
| din | ||||
| dinaintea | ||||
| dintr | ||||
| dintre | ||||
| drept | ||||
| după | ||||
| ea | ||||
| ei | ||||
| el | ||||
| ele | ||||
| eram | ||||
| este | ||||
| eşti | ||||
| eu | ||||
| face | ||||
| fără | ||||
| fi | ||||
| fie | ||||
| fiecare | ||||
| fii | ||||
| fim | ||||
| fiţi | ||||
| iar | ||||
| ieri | ||||
| îi | ||||
| îl | ||||
| îmi | ||||
| împotriva | ||||
| în  | ||||
| înainte | ||||
| înaintea | ||||
| încât | ||||
| încît | ||||
| încotro | ||||
| între | ||||
| întrucât | ||||
| întrucît | ||||
| îţi | ||||
| la | ||||
| lângă | ||||
| le | ||||
| li | ||||
| lîngă | ||||
| lor | ||||
| lui | ||||
| mă | ||||
| mâine | ||||
| mea | ||||
| mei | ||||
| mele | ||||
| mereu | ||||
| meu | ||||
| mi | ||||
| mine | ||||
| mult | ||||
| multă | ||||
| mulţi | ||||
| ne | ||||
| nicăieri | ||||
| nici | ||||
| nimeni | ||||
| nişte | ||||
| noastră | ||||
| noastre | ||||
| noi | ||||
| noştri | ||||
| nostru | ||||
| nu | ||||
| ori | ||||
| oricând | ||||
| oricare | ||||
| oricât | ||||
| orice | ||||
| oricînd | ||||
| oricine | ||||
| oricît | ||||
| oricum | ||||
| oriunde | ||||
| până | ||||
| pe | ||||
| pentru | ||||
| peste | ||||
| pînă | ||||
| poate | ||||
| pot | ||||
| prea | ||||
| prima | ||||
| primul | ||||
| prin | ||||
| printr | ||||
| sa | ||||
| să | ||||
| săi | ||||
| sale | ||||
| sau | ||||
| său | ||||
| se | ||||
| şi | ||||
| sînt | ||||
| sîntem | ||||
| sînteţi | ||||
| spre | ||||
| sub | ||||
| sunt | ||||
| suntem | ||||
| sunteţi | ||||
| ta | ||||
| tăi | ||||
| tale | ||||
| tău | ||||
| te | ||||
| ţi | ||||
| ţie | ||||
| tine | ||||
| toată | ||||
| toate | ||||
| tot | ||||
| toţi | ||||
| totuşi | ||||
| tu | ||||
| un | ||||
| una | ||||
| unde | ||||
| undeva | ||||
| unei | ||||
| unele | ||||
| uneori | ||||
| unor | ||||
| vă | ||||
| vi | ||||
| voastră | ||||
| voastre | ||||
| voi | ||||
| voştri | ||||
| vostru | ||||
| vouă | ||||
| vreo | ||||
| vreun | ||||
							
								
								
									
										243
									
								
								archiver/solr-config-dir/lang/stopwords_ru.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								archiver/solr-config-dir/lang/stopwords_ru.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,243 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | a russian stop word list. comments begin with vertical bar. each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | this is a ranked list (commonest to rarest) of stopwords derived from | ||||
|  | a large text sample. | ||||
| 
 | ||||
|  | letter `ё' is translated to `е'. | ||||
| 
 | ||||
| и              | and | ||||
| в              | in/into | ||||
| во             | alternative form | ||||
| не             | not | ||||
| что            | what/that | ||||
| он             | he | ||||
| на             | on/onto | ||||
| я              | i | ||||
| с              | from | ||||
| со             | alternative form | ||||
| как            | how | ||||
| а              | milder form of `no' (but) | ||||
| то             | conjunction and form of `that' | ||||
| все            | all | ||||
| она            | she | ||||
| так            | so, thus | ||||
| его            | him | ||||
| но             | but | ||||
| да             | yes/and | ||||
| ты             | thou | ||||
| к              | towards, by | ||||
| у              | around, chez | ||||
| же             | intensifier particle | ||||
| вы             | you | ||||
| за             | beyond, behind | ||||
| бы             | conditional/subj. particle | ||||
| по             | up to, along | ||||
| только         | only | ||||
| ее             | her | ||||
| мне            | to me | ||||
| было           | it was | ||||
| вот            | here is/are, particle | ||||
| от             | away from | ||||
| меня           | me | ||||
| еще            | still, yet, more | ||||
| нет            | no, there isnt/arent | ||||
| о              | about | ||||
| из             | out of | ||||
| ему            | to him | ||||
| теперь         | now | ||||
| когда          | when | ||||
| даже           | even | ||||
| ну             | so, well | ||||
| вдруг          | suddenly | ||||
| ли             | interrogative particle | ||||
| если           | if | ||||
| уже            | already, but homonym of `narrower' | ||||
| или            | or | ||||
| ни             | neither | ||||
| быть           | to be | ||||
| был            | he was | ||||
| него           | prepositional form of его | ||||
| до             | up to | ||||
| вас            | you accusative | ||||
| нибудь         | indef. suffix preceded by hyphen | ||||
| опять          | again | ||||
| уж             | already, but homonym of `adder' | ||||
| вам            | to you | ||||
| сказал         | he said | ||||
| ведь           | particle `after all' | ||||
| там            | there | ||||
| потом          | then | ||||
| себя           | oneself | ||||
| ничего         | nothing | ||||
| ей             | to her | ||||
| может          | usually with `быть' as `maybe' | ||||
| они            | they | ||||
| тут            | here | ||||
| где            | where | ||||
| есть           | there is/are | ||||
| надо           | got to, must | ||||
| ней            | prepositional form of  ей | ||||
| для            | for | ||||
| мы             | we | ||||
| тебя           | thee | ||||
| их             | them, their | ||||
| чем            | than | ||||
| была           | she was | ||||
| сам            | self | ||||
| чтоб           | in order to | ||||
| без            | without | ||||
| будто          | as if | ||||
| человек        | man, person, one | ||||
| чего           | genitive form of `what' | ||||
| раз            | once | ||||
| тоже           | also | ||||
| себе           | to oneself | ||||
| под            | beneath | ||||
| жизнь          | life | ||||
| будет          | will be | ||||
| ж              | short form of intensifer particle `же' | ||||
| тогда          | then | ||||
| кто            | who | ||||
| этот           | this | ||||
| говорил        | was saying | ||||
| того           | genitive form of `that' | ||||
| потому         | for that reason | ||||
| этого          | genitive form of `this' | ||||
| какой          | which | ||||
| совсем         | altogether | ||||
| ним            | prepositional form of `его', `они' | ||||
| здесь          | here | ||||
| этом           | prepositional form of `этот' | ||||
| один           | one | ||||
| почти          | almost | ||||
| мой            | my | ||||
| тем            | instrumental/dative plural of `тот', `то' | ||||
| чтобы          | full form of `in order that' | ||||
| нее            | her (acc.) | ||||
| кажется        | it seems | ||||
| сейчас         | now | ||||
| были           | they were | ||||
| куда           | where to | ||||
| зачем          | why | ||||
| сказать        | to say | ||||
| всех           | all (acc., gen. preposn. plural) | ||||
| никогда        | never | ||||
| сегодня        | today | ||||
| можно          | possible, one can | ||||
| при            | by | ||||
| наконец        | finally | ||||
| два            | two | ||||
| об             | alternative form of `о', about | ||||
| другой         | another | ||||
| хоть           | even | ||||
| после          | after | ||||
| над            | above | ||||
| больше         | more | ||||
| тот            | that one (masc.) | ||||
| через          | across, in | ||||
| эти            | these | ||||
| нас            | us | ||||
| про            | about | ||||
| всего          | in all, only, of all | ||||
| них            | prepositional form of `они' (they) | ||||
| какая          | which, feminine | ||||
| много          | lots | ||||
| разве          | interrogative particle | ||||
| сказала        | she said | ||||
| три            | three | ||||
| эту            | this, acc. fem. sing. | ||||
| моя            | my, feminine | ||||
| впрочем        | moreover, besides | ||||
| хорошо         | good | ||||
| свою           | ones own, acc. fem. sing. | ||||
| этой           | oblique form of `эта', fem. `this' | ||||
| перед          | in front of | ||||
| иногда         | sometimes | ||||
| лучше          | better | ||||
| чуть           | a little | ||||
| том            | preposn. form of `that one' | ||||
| нельзя         | one must not | ||||
| такой          | such a one | ||||
| им             | to them | ||||
| более          | more | ||||
| всегда         | always | ||||
| конечно        | of course | ||||
| всю            | acc. fem. sing of `all' | ||||
| между          | between | ||||
| 
 | ||||
| 
 | ||||
|   | b: some paradigms | ||||
|   | | ||||
|   | personal pronouns | ||||
|   | | ||||
|   | я  меня  мне  мной  [мною] | ||||
|   | ты  тебя  тебе  тобой  [тобою] | ||||
|   | он  его  ему  им  [него, нему, ним] | ||||
|   | она  ее  эи  ею  [нее, нэи, нею] | ||||
|   | оно  его  ему  им  [него, нему, ним] | ||||
|   | | ||||
|   | мы  нас  нам  нами | ||||
|   | вы  вас  вам  вами | ||||
|   | они  их  им  ими  [них, ним, ними] | ||||
|   | | ||||
|   |   себя  себе  собой   [собою] | ||||
|   | | ||||
|   | demonstrative pronouns: этот (this), тот (that) | ||||
|   | | ||||
|   | этот  эта  это  эти | ||||
|   | этого  эты  это  эти | ||||
|   | этого  этой  этого  этих | ||||
|   | этому  этой  этому  этим | ||||
|   | этим  этой  этим  [этою]  этими | ||||
|   | этом  этой  этом  этих | ||||
|   | | ||||
|   | тот  та  то  те | ||||
|   | того  ту  то  те | ||||
|   | того  той  того  тех | ||||
|   | тому  той  тому  тем | ||||
|   | тем  той  тем  [тою]  теми | ||||
|   | том  той  том  тех | ||||
|   | | ||||
|   | determinative pronouns | ||||
|   | | ||||
|   | (a) весь (all) | ||||
|   | | ||||
|   | весь  вся  все  все | ||||
|   | всего  всю  все  все | ||||
|   | всего  всей  всего  всех | ||||
|   | всему  всей  всему  всем | ||||
|   | всем  всей  всем  [всею]  всеми | ||||
|   | всем  всей  всем  всех | ||||
|   | | ||||
|   | (b) сам (himself etc) | ||||
|   | | ||||
|   | сам  сама  само  сами | ||||
|   | самого саму  само  самих | ||||
|   | самого самой самого  самих | ||||
|   | самому самой самому  самим | ||||
|   | самим  самой  самим  [самою]  самими | ||||
|   | самом самой самом  самих | ||||
|   | | ||||
|   | stems of verbs `to be', `to have', `to do' and modal | ||||
|   | | ||||
|   | быть  бы  буд  быв  есть  суть | ||||
|   | име | ||||
|   | дел | ||||
|   | мог   мож  мочь | ||||
|   | уме | ||||
|   | хоч  хот | ||||
|   | долж | ||||
|   | можн | ||||
|   | нужн | ||||
|   | нельзя | ||||
| 
 | ||||
							
								
								
									
										133
									
								
								archiver/solr-config-dir/lang/stopwords_sv.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								archiver/solr-config-dir/lang/stopwords_sv.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,133 @@ | |||
|  | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt | ||||
|  | This file is distributed under the BSD License. | ||||
|  | See http://snowball.tartarus.org/license.php | ||||
|  | Also see http://www.opensource.org/licenses/bsd-license.html | ||||
|  |  - Encoding was converted to UTF-8. | ||||
|  |  - This notice was added. | ||||
|  | | ||||
|  | NOTE: To use this file with StopFilterFactory, you must specify format="snowball" | ||||
| 
 | ||||
|  | A Swedish stop word list. Comments begin with vertical bar. Each stop | ||||
|  | word is at the start of a line. | ||||
| 
 | ||||
|  | This is a ranked list (commonest to rarest) of stopwords derived from | ||||
|  | a large text sample. | ||||
| 
 | ||||
|  | Swedish stop words occasionally exhibit homonym clashes. For example | ||||
|  |  så = so, but also seed. These are indicated clearly below. | ||||
| 
 | ||||
| och            | and | ||||
| det            | it, this/that | ||||
| att            | to (with infinitive) | ||||
| i              | in, at | ||||
| en             | a | ||||
| jag            | I | ||||
| hon            | she | ||||
| som            | who, that | ||||
| han            | he | ||||
| på             | on | ||||
| den            | it, this/that | ||||
| med            | with | ||||
| var            | where, each | ||||
| sig            | him(self) etc | ||||
| för            | for | ||||
| så             | so (also: seed) | ||||
| till           | to | ||||
| är             | is | ||||
| men            | but | ||||
| ett            | a | ||||
| om             | if; around, about | ||||
| hade           | had | ||||
| de             | they, these/those | ||||
| av             | of | ||||
| icke           | not, no | ||||
| mig            | me | ||||
| du             | you | ||||
| henne          | her | ||||
| då             | then, when | ||||
| sin            | his | ||||
| nu             | now | ||||
| har            | have | ||||
| inte           | inte någon = no one | ||||
| hans           | his | ||||
| honom          | him | ||||
| skulle         | 'sake' | ||||
| hennes         | her | ||||
| där            | there | ||||
| min            | my | ||||
| man            | one (pronoun) | ||||
| ej             | nor | ||||
| vid            | at, by, on (also: vast) | ||||
| kunde          | could | ||||
| något          | some etc | ||||
| från           | from, off | ||||
| ut             | out | ||||
| när            | when | ||||
| efter          | after, behind | ||||
| upp            | up | ||||
| vi             | we | ||||
| dem            | them | ||||
| vara           | be | ||||
| vad            | what | ||||
| över           | over | ||||
| än             | than | ||||
| dig            | you | ||||
| kan            | can | ||||
| sina           | his | ||||
| här            | here | ||||
| ha             | have | ||||
| mot            | towards | ||||
| alla           | all | ||||
| under          | under (also: wonder) | ||||
| någon          | some etc | ||||
| eller          | or (else) | ||||
| allt           | all | ||||
| mycket         | much | ||||
| sedan          | since | ||||
| ju             | why | ||||
| denna          | this/that | ||||
| själv          | myself, yourself etc | ||||
| detta          | this/that | ||||
| åt             | to | ||||
| utan           | without | ||||
| varit          | was | ||||
| hur            | how | ||||
| ingen          | no | ||||
| mitt           | my | ||||
| ni             | you | ||||
| bli            | to be, become | ||||
| blev           | from bli | ||||
| oss            | us | ||||
| din            | thy | ||||
| dessa          | these/those | ||||
| några          | some etc | ||||
| deras          | their | ||||
| blir           | from bli | ||||
| mina           | my | ||||
| samma          | (the) same | ||||
| vilken         | who, that | ||||
| er             | you, your | ||||
| sådan          | such a | ||||
| vår            | our | ||||
| blivit         | from bli | ||||
| dess           | its | ||||
| inom           | within | ||||
| mellan         | between | ||||
| sådant         | such a | ||||
| varför         | why | ||||
| varje          | each | ||||
| vilka          | who, that | ||||
| ditt           | thy | ||||
| vem            | who | ||||
| vilket         | who, that | ||||
| sitta          | his | ||||
| sådana         | such a | ||||
| vart           | each | ||||
| dina           | thy | ||||
| vars           | whose | ||||
| vårt           | our | ||||
| våra           | our | ||||
| ert            | your | ||||
| era            | your | ||||
| vilkas         | whose | ||||
| 
 | ||||
							
								
								
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_th.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								archiver/solr-config-dir/lang/stopwords_th.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,119 @@ | |||
| # Thai stopwords from: | ||||
| # "Opinion Detection in Thai Political News Columns | ||||
| # Based on Subjectivity Analysis" | ||||
| # Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak | ||||
| ไว้ | ||||
| ไม่ | ||||
| ไป | ||||
| ได้ | ||||
| ให้ | ||||
| ใน | ||||
| โดย | ||||
| แห่ง | ||||
| แล้ว | ||||
| และ | ||||
| แรก | ||||
| แบบ | ||||
| แต่ | ||||
| เอง | ||||
| เห็น | ||||
| เลย | ||||
| เริ่ม | ||||
| เรา | ||||
| เมื่อ | ||||
| เพื่อ | ||||
| เพราะ | ||||
| เป็นการ | ||||
| เป็น | ||||
| เปิดเผย | ||||
| เปิด | ||||
| เนื่องจาก | ||||
| เดียวกัน | ||||
| เดียว | ||||
| เช่น | ||||
| เฉพาะ | ||||
| เคย | ||||
| เข้า | ||||
| เขา | ||||
| อีก | ||||
| อาจ | ||||
| อะไร | ||||
| ออก | ||||
| อย่าง | ||||
| อยู่ | ||||
| อยาก | ||||
| หาก | ||||
| หลาย | ||||
| หลังจาก | ||||
| หลัง | ||||
| หรือ | ||||
| หนึ่ง | ||||
| ส่วน | ||||
| ส่ง | ||||
| สุด | ||||
| สําหรับ | ||||
| ว่า | ||||
| วัน | ||||
| ลง | ||||
| ร่วม | ||||
| ราย | ||||
| รับ | ||||
| ระหว่าง | ||||
| รวม | ||||
| ยัง | ||||
| มี | ||||
| มาก | ||||
| มา | ||||
| พร้อม | ||||
| พบ | ||||
| ผ่าน | ||||
| ผล | ||||
| บาง | ||||
| น่า | ||||
| นี้ | ||||
| นํา | ||||
| นั้น | ||||
| นัก | ||||
| นอกจาก | ||||
| ทุก | ||||
| ที่สุด | ||||
| ที่ | ||||
| ทําให้ | ||||
| ทํา | ||||
| ทาง | ||||
| ทั้งนี้ | ||||
| ทั้ง | ||||
| ถ้า | ||||
| ถูก | ||||
| ถึง | ||||
| ต้อง | ||||
| ต่างๆ | ||||
| ต่าง | ||||
| ต่อ | ||||
| ตาม | ||||
| ตั้งแต่ | ||||
| ตั้ง | ||||
| ด้าน | ||||
| ด้วย | ||||
| ดัง | ||||
| ซึ่ง | ||||
| ช่วง | ||||
| จึง | ||||
| จาก | ||||
| จัด | ||||
| จะ | ||||
| คือ | ||||
| ความ | ||||
| ครั้ง | ||||
| คง | ||||
| ขึ้น | ||||
| ของ | ||||
| ขอ | ||||
| ขณะ | ||||
| ก่อน | ||||
| ก็ | ||||
| การ | ||||
| กับ | ||||
| กัน | ||||
| กว่า | ||||
| กล่าว | ||||
							
								
								
									
										212
									
								
								archiver/solr-config-dir/lang/stopwords_tr.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								archiver/solr-config-dir/lang/stopwords_tr.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,212 @@ | |||
| # Turkish stopwords from LUCENE-559 | ||||
| # merged with the list from "Information Retrieval on Turkish Texts" | ||||
| #   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf) | ||||
| acaba | ||||
| altmış | ||||
| altı | ||||
| ama | ||||
| ancak | ||||
| arada | ||||
| aslında | ||||
| ayrıca | ||||
| bana | ||||
| bazı | ||||
| belki | ||||
| ben | ||||
| benden | ||||
| beni | ||||
| benim | ||||
| beri | ||||
| beş | ||||
| bile | ||||
| bin | ||||
| bir | ||||
| birçok | ||||
| biri | ||||
| birkaç | ||||
| birkez | ||||
| birşey | ||||
| birşeyi | ||||
| biz | ||||
| bize | ||||
| bizden | ||||
| bizi | ||||
| bizim | ||||
| böyle | ||||
| böylece | ||||
| bu | ||||
| buna | ||||
| bunda | ||||
| bundan | ||||
| bunlar | ||||
| bunları | ||||
| bunların | ||||
| bunu | ||||
| bunun | ||||
| burada | ||||
| çok | ||||
| çünkü | ||||
| da | ||||
| daha | ||||
| dahi | ||||
| de | ||||
| defa | ||||
| değil | ||||
| diğer | ||||
| diye | ||||
| doksan | ||||
| dokuz | ||||
| dolayı | ||||
| dolayısıyla | ||||
| dört | ||||
| edecek | ||||
| eden | ||||
| ederek | ||||
| edilecek | ||||
| ediliyor | ||||
| edilmesi | ||||
| ediyor | ||||
| eğer | ||||
| elli | ||||
| en | ||||
| etmesi | ||||
| etti | ||||
| ettiği | ||||
| ettiğini | ||||
| gibi | ||||
| göre | ||||
| halen | ||||
| hangi | ||||
| hatta | ||||
| hem | ||||
| henüz | ||||
| hep | ||||
| hepsi | ||||
| her | ||||
| herhangi | ||||
| herkesin | ||||
| hiç | ||||
| hiçbir | ||||
| için | ||||
| iki | ||||
| ile | ||||
| ilgili | ||||
| ise | ||||
| işte | ||||
| itibaren | ||||
| itibariyle | ||||
| kadar | ||||
| karşın | ||||
| katrilyon | ||||
| kendi | ||||
| kendilerine | ||||
| kendini | ||||
| kendisi | ||||
| kendisine | ||||
| kendisini | ||||
| kez | ||||
| ki | ||||
| kim | ||||
| kimden | ||||
| kime | ||||
| kimi | ||||
| kimse | ||||
| kırk | ||||
| milyar | ||||
| milyon | ||||
| mu | ||||
| mü | ||||
| mı | ||||
| nasıl | ||||
| ne | ||||
| neden | ||||
| nedenle | ||||
| nerde | ||||
| nerede | ||||
| nereye | ||||
| niye | ||||
| niçin | ||||
| o | ||||
| olan | ||||
| olarak | ||||
| oldu | ||||
| olduğu | ||||
| olduğunu | ||||
| olduklarını | ||||
| olmadı | ||||
| olmadığı | ||||
| olmak | ||||
| olması | ||||
| olmayan | ||||
| olmaz | ||||
| olsa | ||||
| olsun | ||||
| olup | ||||
| olur | ||||
| olursa | ||||
| oluyor | ||||
| on | ||||
| ona | ||||
| ondan | ||||
| onlar | ||||
| onlardan | ||||
| onları | ||||
| onların | ||||
| onu | ||||
| onun | ||||
| otuz | ||||
| oysa | ||||
| öyle | ||||
| pek | ||||
| rağmen | ||||
| sadece | ||||
| sanki | ||||
| sekiz | ||||
| seksen | ||||
| sen | ||||
| senden | ||||
| seni | ||||
| senin | ||||
| siz | ||||
| sizden | ||||
| sizi | ||||
| sizin | ||||
| şey | ||||
| şeyden | ||||
| şeyi | ||||
| şeyler | ||||
| şöyle | ||||
| şu | ||||
| şuna | ||||
| şunda | ||||
| şundan | ||||
| şunları | ||||
| şunu | ||||
| tarafından | ||||
| trilyon | ||||
| tüm | ||||
| üç | ||||
| üzere | ||||
| var | ||||
| vardı | ||||
| ve | ||||
| veya | ||||
| ya | ||||
| yani | ||||
| yapacak | ||||
| yapılan | ||||
| yapılması | ||||
| yapıyor | ||||
| yapmak | ||||
| yaptı | ||||
| yaptığı | ||||
| yaptığını | ||||
| yaptıkları | ||||
| yedi | ||||
| yerine | ||||
| yetmiş | ||||
| yine | ||||
| yirmi | ||||
| yoksa | ||||
| yüz | ||||
| zaten | ||||
							
								
								
									
										29
									
								
								archiver/solr-config-dir/lang/userdict_ja.txt
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								archiver/solr-config-dir/lang/userdict_ja.txt
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| # | ||||
| # This is a sample user dictionary for Kuromoji (JapaneseTokenizer) | ||||
| # | ||||
| # Add entries to this file in order to override the statistical model in terms | ||||
| # of segmentation, readings and part-of-speech tags.  Notice that entries do | ||||
| # not have weights since they are always used when found.  This is by-design | ||||
| # in order to maximize ease-of-use. | ||||
| # | ||||
| # Entries are defined using the following CSV format: | ||||
| #  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag> | ||||
| # | ||||
| # Notice that a single half-width space separates tokens and readings, and | ||||
| # that the number tokens and readings must match exactly. | ||||
| # | ||||
| # Also notice that multiple entries with the same <text> is undefined. | ||||
| # | ||||
| # Whitespace only lines are ignored.  Comments are not allowed on entry lines. | ||||
| # | ||||
| 
 | ||||
| # Custom segmentation for kanji compounds | ||||
| 日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞 | ||||
| 関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞 | ||||
| 
 | ||||
| # Custom segmentation for compound katakana | ||||
| トートバッグ,トート バッグ,トート バッグ,かずカナ名詞 | ||||
| ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞 | ||||
| 
 | ||||
| # Custom reading for former sumo wrestler | ||||
| 朝青龍,朝青龍,アサショウリュウ,カスタム人名 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue