breezewiki/archiver/solr-config-dir/schema.xml

162 lines
7.4 KiB
XML
Raw Permalink Normal View History

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.6">
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<!--
Numeric field types that index values using KD-trees.
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
-->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
<!-- since fields of this type are by default not stored or indexed,
any data added to them will be ignored outright. -->
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
is a more restricted form of the canonical representation of dateTime
http://www.w3.org/TR/xmlschema-2/#dateTime
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other components are mandatory.
Expressions can also be used to denote calculations that should be
performed relative to "NOW" to determine the value, ie...
NOW/HOUR
... Round to the start of the current hour
NOW-1DAY
... Exactly 1 day prior to now
NOW/DAY+6MONTHS+3DAYS
... 6 months and 3 days in the future from the start of
the current day
-->
<!-- KD-tree versions of date fields -->
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldType name="binary" class="solr.BinaryField"/>
<!--
RankFields can be used to store scoring factors to improve document ranking. They should be used
in combination with RankQParserPlugin.
(experimental)
-->
<fieldType name="rank" class="solr.RankField"/>
<!-- solr.TextField allows the specification of custom text analyzers
specified as a tokenizer and a list of token filters. Different
analyzers may be specified for indexing and querying.
The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.
For more info on customizing your analyzer chain, please see
https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters
-->
<!-- One can also specify an existing Analyzer class that has a
default constructor via the class attribute on the analyzer element.
Example:
<fieldType name="text_greek" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
</fieldType>
-->
<fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
2023-12-04 20:58:07 +00:00
<tokenizer name="standard"/>
<filter name="lowercase"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />
</analyzer>
<analyzer type="query">
2023-12-04 20:58:07 +00:00
<tokenizer name="standard"/>
<filter name="lowercase"/>
</analyzer>
</fieldType>
2023-12-04 21:12:59 +00:00
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
<filter name="lowercase"/>
</analyzer>
<analyzer type="query">
<tokenizer name="standard"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
<filter name="lowercase"/>
</analyzer>
</fieldType>
2023-12-06 03:35:07 +00:00
<fieldType name="text_en_number_splitting" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
2023-12-04 20:58:07 +00:00
<tokenizer name="whitespace"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/>
2023-12-04 20:58:07 +00:00
<filter name="porterStem"/>
<filter name="flattenGraph" />
</analyzer>
<analyzer type="query">
2023-12-04 20:58:07 +00:00
<tokenizer name="whitespace"/>
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/>
2023-12-04 20:58:07 +00:00
<filter name="porterStem"/>
</analyzer>
</fieldType>
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer name="whitespace"/>
2023-12-04 20:58:07 +00:00
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/>
<filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />
<filter name="porterStem"/>
<filter name="flattenGraph" />
</analyzer>
<analyzer type="query">
<tokenizer name="whitespace"/>
2023-12-04 20:58:07 +00:00
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter name="lowercase"/>
<filter name="porterStem"/>
</analyzer>
</fieldType>
<!-- Fields -->
<field name="id" type="string" indexed="true" required="true" />
2023-12-06 03:35:07 +00:00
<field name="title" type="text_en_number_splitting" indexed="true" stored="true" required="true" />
<field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />
<field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />
<field name="len" type="pint" indexed="false" stored="true" required="true" />
<!-- Copy title to title_prefix for matching -->
<field name="title_prefix" type="text_prefix" indexed="true" stored="false" />
<copyField source="title" dest="title_prefix" />
<field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>
<!-- Unique Key -->
<uniqueKey>id</uniqueKey>
</schema>