breezewiki/archiver/solr-config-dir/schema.xml

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.6">

  <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
  <fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />

  <!-- boolean type: "true" or "false" -->
  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>

  <!--
      Numeric field types that index values using KD-trees.
      Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
  -->
  <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
  <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
  <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
  <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>

  <fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
  <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
  <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
  <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
  <fieldType name="random" class="solr.RandomSortField" indexed="true"/>

  <!-- since fields of this type are by default not stored or indexed,
       any data added to them will be ignored outright.  -->
  <fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

  <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
       is a more restricted form of the canonical representation of dateTime
       http://www.w3.org/TR/xmlschema-2/#dateTime
       The trailing "Z" designates UTC time and is mandatory.
       Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
       All other components are mandatory.

Expressions can also be used to denote calculations that should be
performed relative to "NOW" to determine the value, ie...

NOW/HOUR
... Round to the start of the current hour
NOW-1DAY
... Exactly 1 day prior to now
NOW/DAY+6MONTHS+3DAYS
... 6 months and 3 days in the future from the start of
the current day

  -->
  <!-- KD-tree versions of date fields -->
  <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
  <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>

  <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
  <fieldType name="binary" class="solr.BinaryField"/>

  <!--
      RankFields can be used to store scoring factors to improve document ranking. They should be used
      in combination with RankQParserPlugin.
      (experimental)
  -->
  <fieldType name="rank" class="solr.RankField"/>

  <!-- solr.TextField allows the specification of custom text analyzers
       specified as a tokenizer and a list of token filters. Different
       analyzers may be specified for indexing and querying.

The optional positionIncrementGap puts space between multiple fields of
this type on the same document, with the purpose of preventing false phrase
matching across fields.

For more info on customizing your analyzer chain, please see
https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters
  -->

  <!-- One can also specify an existing Analyzer class that has a
       default constructor via the class attribute on the analyzer element.
       Example:
       <fieldType name="text_greek" class="solr.TextField">
       <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
       </fieldType>
  -->

  <fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer name="standard"/>
      <filter name="lowercase"/>
      <filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />
    </analyzer>
    <analyzer type="query">
      <tokenizer name="standard"/>
      <filter name="lowercase"/>
    </analyzer>
  </fieldType>

  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
    <analyzer type="index">
      <tokenizer name="standard"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
      <filter name="lowercase"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer name="standard"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />
      <filter name="lowercase"/>
    </analyzer>
  </fieldType>

  <fieldType name="text_en_number_splitting" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer name="whitespace"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
      <filter name="lowercase"/>
      <filter name="porterStem"/>
      <filter name="flattenGraph" />
    </analyzer>
    <analyzer type="query">
      <tokenizer name="whitespace"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
      <filter name="lowercase"/>
      <filter name="porterStem"/>
    </analyzer>
  </fieldType>

  <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
    <analyzer type="index">
      <tokenizer name="whitespace"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
      <filter name="lowercase"/>
      <filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />
      <filter name="porterStem"/>
      <filter name="flattenGraph" />
    </analyzer>
    <analyzer type="query">
      <tokenizer name="whitespace"/>
      <filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>
      <filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
      <filter name="lowercase"/>
      <filter name="porterStem"/>
    </analyzer>
  </fieldType>

  <!-- Fields -->
  <field name="id" type="string" indexed="true" required="true" />
  <field name="title" type="text_en_number_splitting" indexed="true" stored="true" required="true" />
  <field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />
  <field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />
  <field name="len" type="pint" indexed="false" stored="true" required="true" />

  <!-- Copy title to title_prefix for matching -->
  <field name="title_prefix" type="text_prefix" indexed="true" stored="false" />
  <copyField source="title" dest="title_prefix" />

  <field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>

  <!-- Unique Key -->
  <uniqueKey>id</uniqueKey>
</schema>
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<?xml version="1.0" encoding="UTF-8" ?>`
			`<schema name="example" version="1.6">`

			`<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->`
			`<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />`
			`<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />`

			`<!-- boolean type: "true" or "false" -->`
			`<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>`
			`<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>`

			`<!--`
			`Numeric field types that index values using KD-trees.`
			`Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.`
			`-->`
			`<fieldType name="pint" class="solr.IntPointField" docValues="true"/>`
			`<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>`
			`<fieldType name="plong" class="solr.LongPointField" docValues="true"/>`
			`<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>`

			`<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>`
			`<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>`
			`<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>`
			`<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>`
			`<fieldType name="random" class="solr.RandomSortField" indexed="true"/>`

			`<!-- since fields of this type are by default not stored or indexed,`
			`any data added to them will be ignored outright. -->`
			`<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />`

			`<!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and`
			`is a more restricted form of the canonical representation of dateTime`
			`http://www.w3.org/TR/xmlschema-2/#dateTime`
			`The trailing "Z" designates UTC time and is mandatory.`
			`Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z`
			`All other components are mandatory.`

			`Expressions can also be used to denote calculations that should be`
			`performed relative to "NOW" to determine the value, ie...`

			`NOW/HOUR`
			`... Round to the start of the current hour`
			`NOW-1DAY`
			`... Exactly 1 day prior to now`
			`NOW/DAY+6MONTHS+3DAYS`
			`... 6 months and 3 days in the future from the start of`
			`the current day`

			`-->`
			`<!-- KD-tree versions of date fields -->`
			`<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>`
			`<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>`

			`<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->`
			`<fieldType name="binary" class="solr.BinaryField"/>`

			`<!--`
			`RankFields can be used to store scoring factors to improve document ranking. They should be used`
			`in combination with RankQParserPlugin.`
			`(experimental)`
			`-->`
			`<fieldType name="rank" class="solr.RankField"/>`

			`<!-- solr.TextField allows the specification of custom text analyzers`
			`specified as a tokenizer and a list of token filters. Different`
			`analyzers may be specified for indexing and querying.`

			`The optional positionIncrementGap puts space between multiple fields of`
			`this type on the same document, with the purpose of preventing false phrase`
			`matching across fields.`

			`For more info on customizing your analyzer chain, please see`
			`https://solr.apache.org/guide/solr/latest/indexing-guide/document-analysis.html#using-analyzers-tokenizers-and-filters`
			`-->`

			`<!-- One can also specify an existing Analyzer class that has a`
			`default constructor via the class attribute on the analyzer element.`
			`Example:`
			`<fieldType name="text_greek" class="solr.TextField">`
			`<analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>`
			`</fieldType>`
			`-->`

			`<fieldType name="text_prefix" class="solr.TextField" positionIncrementGap="100">`
			`<analyzer type="index">`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<tokenizer name="standard"/>`
			`<filter name="lowercase"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="20" />`
			`</analyzer>`
			`<analyzer type="query">`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<tokenizer name="standard"/>`
			`<filter name="lowercase"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`</analyzer>`
			`</fieldType>`

Solr schema add back text_general 2023-12-04 21:12:59 +00:00			`<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">`
			`<analyzer type="index">`
			`<tokenizer name="standard"/>`
			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />`
			`<filter name="lowercase"/>`
			`</analyzer>`
			`<analyzer type="query">`
			`<tokenizer name="standard"/>`
			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt" />`
			`<filter name="lowercase"/>`
			`</analyzer>`
			`</fieldType>`

Solr fix multivalued in schema 2023-12-06 03:35:07 +00:00			`<fieldType name="text_en_number_splitting" class="solr.TextField" positionIncrementGap="100">`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<analyzer type="index">`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<tokenizer name="whitespace"/>`
			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>`
			`<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<filter name="lowercase"/>`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<filter name="porterStem"/>`
			`<filter name="flattenGraph" />`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`</analyzer>`
			`<analyzer type="query">`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<tokenizer name="whitespace"/>`
			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>`
			`<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<filter name="lowercase"/>`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<filter name="porterStem"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`</analyzer>`
			`</fieldType>`

			`<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">`
			`<analyzer type="index">`
			`<tokenizer name="whitespace"/>`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>`
			`<filter name="lowercase"/>`
			`<filter class="solr.PatternReplaceFilterFactory" pattern="(\d{2,})" replacement="" replace="all" />`
			`<filter name="porterStem"/>`
			`<filter name="flattenGraph" />`
			`</analyzer>`
			`<analyzer type="query">`
			`<tokenizer name="whitespace"/>`
Solr schema title can store numbers 2023-12-04 20:58:07 +00:00			`<filter name="stop" ignoreCase="true" words="lang/stopwords_en.txt"/>`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<filter name="wordDelimiterGraph" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>`
			`<filter name="lowercase"/>`
			`<filter name="porterStem"/>`
			`</analyzer>`
			`</fieldType>`

			`<!-- Fields -->`
			`<field name="id" type="string" indexed="true" required="true" />`
Solr fix multivalued in schema 2023-12-06 03:35:07 +00:00			`<field name="title" type="text_en_number_splitting" indexed="true" stored="true" required="true" />`
Add more indexers and Solr configuration 2023-11-29 03:57:50 +00:00			`<field name="body" type="text_en_splitting" indexed="true" stored="true" required="true" />`
			`<field name="table" type="text_en_splitting" indexed="true" stored="false" required="true" />`
			`<field name="len" type="pint" indexed="false" stored="true" required="true" />`

			`<!-- Copy title to title_prefix for matching -->`
			`<field name="title_prefix" type="text_prefix" indexed="true" stored="false" />`
			`<copyField source="title" dest="title_prefix" />`

			`<field name="_version_" type="plong" indexed="true" stored="true" multiValued="false"/>`

			`<!-- Unique Key -->`
			`<uniqueKey>id</uniqueKey>`
			`</schema>`