2023-11-29 03:57:50 +00:00
|
|
|
<?xml version="1.0" encoding="UTF-8" ?>
|
|
|
|
<schema name="example" version="1.6">
|
|
|
|
|
|
|
|
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
|
|
|
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
|
|
|
|
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />
|
|
|
|
|
|
|
|
<!-- boolean type: "true" or "false" -->
|
|
|
|
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
|
|
|
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
|
|
|
|
|
|
|
|
<!--
|
|
|
|
Numeric field types that index values using KD-trees.
|
|
|
|
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
|
|
|
|
-->
|
|
|
|
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
|
|
|
|
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
|
|
|
|
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
|
|
|
|
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
|
|
|
|
|
|
|
|
<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
|
|
|
|
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
|
|
|
|
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
|
|
|
|
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
|
|
|
|
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>
|
|
|
|
|
|
|
|
<!-- since fields of this type are by default not stored or indexed,
|
|
|
|
any data added to them will be ignored outright. -->
|
|
|
|
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
|
|
|
|
|
|
|
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
|
|
|
|
is a more restricted form of the canonical representation of dateTime
|
|
|
|
http://www.w3.org/TR/xmlschema-2/#dateTime
|
|
|
|
The trailing "Z" designates UTC time and is mandatory.
|
|
|
|
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
|
|
|
|
All other components are mandatory.
|
|
|
|
|
|
|
|
Expressions can also be used to denote calculations that should be
|
|
|
|
performed relative to "NOW" to determine the value, ie...
|
|
|
|
|
|
|
|
NOW/HOUR
|
|
|
|
... Round to the start of the current hour
|
|
|
|
NOW-1DAY
|
|
|
|
... Exactly 1 day prior to now
|
|
|
|
NOW/DAY+6MONTHS+3DAYS
|
|
|
|
... 6 months and 3 days in the future from the start of
|
|
|
|
the current day
|
|
|
|
|
|
|
|
-->
|
|
|
|
<!-- KD-tree versions of date fields -->
|
|
|
|
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
|
|
|
|
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
|
|
|
|
|
|
|
|
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
|
|
|
<fieldType name="binary" class="solr.BinaryField"/>
|
|
|
|
|
|
|
|
<!--
|
|
|
|
RankFields can be used to store scoring factors to improve document ranking. They should be used
|
|
|
|
in combination with RankQParserPlugin.
|
|
|
|
(experimental)
|
|
|
|
-->
|
|
|
|
<fieldType name="rank" class="solr.RankField"/>
|
|
|
|
|
|
|
|
<!-- solr.TextField allows the specification of custom text analyzers
|
|
|
|
specified as a tokenizer and a list of token filters. Different
|
|
|
|
analyzers may be specified for indexing and querying.
|
|
|
|
|
|
|
|
The optional positionIncrementGap puts space between multiple fields of
|
|
|
|
this type on the same document, with the purpose of preventing false phrase
|
|
|
|
matching across fields.
|
|
|
|
|
|
|
|
For more info on customizing your analyzer chain, please see
|
|
|
|
https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters
|
|
|
|
-->
|
|
|
|
|
|
|
|
<!-- One can also specify an existing Analyzer class that has a
|
|
|
|
default constructor via the class attribute on the analyzer element.
|
|
|
|
Example:
|
|
|
|
<fieldType name="text_greek" class="solr.TextField">
|
|
|
|
<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
|
|
|
|
</fieldType>
|
|
|
|
-->
|
|
|
|
|
|
|
|
<fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">
|
|
|
|
<analyzer type="index">
|
2023-12-04 20:58:07 +00:00
|
|
|
<tokenizer name="standard"/>
|
|
|
|
<filter name="lowercase"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />
|
|
|
|
</analyzer>
|
|
|
|
<analyzer type="query">
|
2023-12-04 20:58:07 +00:00
|
|
|
<tokenizer name="standard"/>
|
|
|
|
<filter name="lowercase"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
</analyzer>
|
|
|
|
</fieldType>
|
|
|
|
|
2023-12-04 21:12:59 +00:00
|
|
|
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
|
|
|
|
<analyzer type="index">
|
|
|
|
<tokenizer name="standard"/>
|
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
|
|
|
|
<filter name="lowercase"/>
|
|
|
|
</analyzer>
|
|
|
|
<analyzer type="query">
|
|
|
|
<tokenizer name="standard"/>
|
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
|
|
|
|
<filter name="lowercase"/>
|
|
|
|
</analyzer>
|
|
|
|
</fieldType>
|
|
|
|
|
2023-12-04 20:58:07 +00:00
|
|
|
<fieldType name="text_en_splitting_numbers" class="solr.TextField" positionIncrementGap="100" multiValued="true">
|
2023-11-29 03:57:50 +00:00
|
|
|
<analyzer type="index">
|
2023-12-04 20:58:07 +00:00
|
|
|
<tokenizer name="whitespace"/>
|
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
|
|
|
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
<filter name="lowercase"/>
|
2023-12-04 20:58:07 +00:00
|
|
|
<filter name="porterStem"/>
|
|
|
|
<filter name="flattenGraph" />
|
2023-11-29 03:57:50 +00:00
|
|
|
</analyzer>
|
|
|
|
<analyzer type="query">
|
2023-12-04 20:58:07 +00:00
|
|
|
<tokenizer name="whitespace"/>
|
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
|
|
|
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
<filter name="lowercase"/>
|
2023-12-04 20:58:07 +00:00
|
|
|
<filter name="porterStem"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
</analyzer>
|
|
|
|
</fieldType>
|
|
|
|
|
|
|
|
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
|
|
|
<analyzer type="index">
|
|
|
|
<tokenizer name="whitespace"/>
|
2023-12-04 20:58:07 +00:00
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
|
|
|
<filter name="lowercase"/>
|
|
|
|
<filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />
|
|
|
|
<filter name="porterStem"/>
|
|
|
|
<filter name="flattenGraph" />
|
|
|
|
</analyzer>
|
|
|
|
<analyzer type="query">
|
|
|
|
<tokenizer name="whitespace"/>
|
2023-12-04 20:58:07 +00:00
|
|
|
<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
|
2023-11-29 03:57:50 +00:00
|
|
|
<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
|
|
|
<filter name="lowercase"/>
|
|
|
|
<filter name="porterStem"/>
|
|
|
|
</analyzer>
|
|
|
|
</fieldType>
|
|
|
|
|
|
|
|
<!-- Fields -->
|
|
|
|
<field name="id" type="string" indexed="true" required="true" />
|
2023-12-04 20:58:07 +00:00
|
|
|
<field name="title" type="text_en_splitting_numbers" indexed="true" stored="true" required="true" />
|
2023-11-29 03:57:50 +00:00
|
|
|
<field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />
|
|
|
|
<field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />
|
|
|
|
<field name="len" type="pint" indexed="false" stored="true" required="true" />
|
|
|
|
|
|
|
|
<!-- Copy title to title_prefix for matching -->
|
|
|
|
<field name="title_prefix" type="text_prefix" indexed="true" stored="false" />
|
|
|
|
<copyField source="title" dest="title_prefix" />
|
|
|
|
|
|
|
|
<field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>
|
|
|
|
|
|
|
|
<!-- Unique Key -->
|
|
|
|
<uniqueKey>id</uniqueKey>
|
|
|
|
</schema>
|