Handle diacritics in PagedSearchController. NIHVIVO-3277

2011-11-03 16:49:15 +00:00 · 2011-11-03 16:49:15 +00:00 · 98ded1f8bb
commit 98ded1f8bb
parent 7b6a22f447
6 changed files with 141 additions and 415 deletions
--- a/solr/homeDirectoryTemplate/conf/schema.xml
+++ b/solr/homeDirectoryTemplate/conf/schema.xml
@ -1,33 +1,6 @@
 <?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->

 <!--  
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default) 
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-
 PERFORMANCE NOTE: this schema includes many optional features and should not
 be used for benchmarking.  To improve performance one could
  - set stored="false" for all fields possible (esp large fields) when you
@ -94,22 +67,7 @@
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
-
-    <!--
-     Numeric field types that index each value at various levels of precision
-     to accelerate range queries when the number of values between the range
-     endpoints is large. See the javadoc for NumericRangeQuery for internal
-     implementation details.
-
-     Smaller precisionStep values (specified in bits) will lead to more tokens
-     indexed per value, slightly larger index size, and faster range queries.
-     A precisionStep of 0 disables indexing at different precision levels.
-    -->
-    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> 

    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
         is a more restricted form of the canonical representation of dateTime
@ -138,39 +96,6 @@
    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

-
-    <!--
-      Note:
-      These should only be used for compatibility with existing indexes (created with older Solr versions)
-      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
-      Plain numeric field types that store and index the text
-      value verbatim (and hence don't support range queries, since the
-      lexicographic ordering isn't equal to the numeric ordering)
-    -->
-    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
-    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
-    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
-    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
-    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
-
-
-    <!--
-      Note:
-      These should only be used for compatibility with existing indexes (created with older Solr versions)
-      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
-      Numeric field types that manipulate the value into
-      a string value that isn't human-readable in its internal form,
-      but with a lexicographic ordering the same as the numeric ordering,
-      so that range queries work correctly.
-    -->
-    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
-
-
    <!-- The "RandomSortField" is not used to store or search any
         data.  You can declare fields of this type it in your schema
         to generate pseudo-random orderings of your docs for sorting 
@ -194,21 +119,7 @@

         For more info on customizing your analyzer chain, please see
         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
-     -->
-
-    <!-- One can also specify an existing Analyzer class that has a
-         default constructor via the class attribute on the analyzer element
-    <fieldType name="text_greek" class="solr.TextField">
-      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
-    </fieldType>
-    -->
-
-    <!-- A text field that only splits on whitespace for exact matching of words -->
-    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
+     -->    

    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
@ -218,26 +129,45 @@
        form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
        to generate text:"pdp 11" rather than (text:PDP OR text:11).
        NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
-        -->
+    -->
+    <!--
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <!-- in this example, we will only use synonyms at query time -->
-       <!-- <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/> -->
-        
-        <!-- Case insensitive stop word removal.
-          add enablePositionIncrements=true in both the index and query
-          analyzers to leave a 'gap' for more accurate phrase queries.
-        -->
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />      
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
          catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>        
      </analyzer>
+    </fieldType>-->
+  
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer type="index">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>      
+      <filter class="solr.ISOLatin1AccentFilterFactory"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
+              catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
+      <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.ISOLatin1AccentFilterFactory"/>      
+      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
+              catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
+      <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>      
+    </analyzer>
    </fieldType>
-
+    
    <!-- Like text, but without synonyms and stemming. Good for autocomplete where we want to remove
      stop words but not stem. -->
    <fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
@ -256,36 +186,20 @@
    <!-- Like text, but without synonyms. Good for autocomplete matching of book/grant titles, etc., where we want to remove
      stop words and stem. -->
    <fieldType name="text_stemmed" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
+      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
          words="stopwords-name.txt"  enablePositionIncrements="true" />               
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"                 
          generateNumberParts="1" catenateWords="0"                 
          catenateNumbers="0" catenateAll="0"                
-          splitOnCaseChange="1" />                                               
-        <filter class="solr.LowerCaseFilterFactory"/> 
+          splitOnCaseChange="1" />
+        <filter class="solr.ASCIIFoldingFilterFactory"/>        
+        <filter class="solr.LowerCaseFilterFactory"/>        
        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>  
      </analyzer>
    </fieldType>
-    
-    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
-         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
-    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-        <filter class="solr.EnglishMinimalStemFilterFactory"/>
-        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
-             possible with WordDelimiterFilter in conjuncton with stemming. -->
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-      </analyzer>
-    </fieldType>
-
+        

    <!-- A general unstemmed text field - good if one does not know the language of the field -->
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
@ -308,74 +222,6 @@
      </analyzer>
    </fieldType>

-
-    <!-- A general unstemmed text field that indexes tokens normally and also
-         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
-	 leading wildcard queries. -->
-    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
-           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
-      </analyzer>
-      <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-    </fieldType>
-
-    <!-- charFilter + WhitespaceTokenizer  -->
-    <!--
-    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
-      <analyzer>
-        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-    -->
-
-    <!-- This is an example of using the KeywordTokenizer along
-         With various TokenFilterFactories to produce a sortable field
-         that does not include some properties of the source text
-      -->
-    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
-      <analyzer>
-        <!-- KeywordTokenizer does no actual tokenizing, so the entire
-             input string is preserved as a single token
-          -->
-        <tokenizer class="solr.KeywordTokenizerFactory"/>
-        <!-- The LowerCase TokenFilter does what you expect, which can be
-             when you want your sorting to be case insensitive
-          -->
-        <filter class="solr.LowerCaseFilterFactory" />
-        <!-- The TrimFilter removes any leading or trailing whitespace -->
-        <filter class="solr.TrimFilterFactory" />
-        <!-- The PatternReplaceFilter gives you the flexibility to use
-             Java Regular expression to replace any sequence of characters
-             matching a pattern with an arbitrary replacement string, 
-             which may include back references to portions of the original
-             string matched by the pattern.
-             
-             See the Java Regular Expression documentation for more
-             information on pattern and replacement string syntax.
-             
-             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
-          -->
-        <filter class="solr.PatternReplaceFilterFactory"
-                pattern="([^a-z])" replacement="" replace="all"
-        />
-      </analyzer>
-    </fieldType>
    
    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
@ -384,24 +230,6 @@
      </analyzer>
    </fieldtype>

-    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <!--
-        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
-        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
-        Attributes of the DelimitedPayloadTokenFilterFactory : 
-         "delimiter" - a one character delimiter. Default is | (pipe)
-	 "encoder" - how to encode the following value into a playload
-	    float -> org.apache.lucene.analysis.payloads.FloatEncoder,
-	    integer -> o.a.l.a.p.IntegerEncoder
-	    identity -> o.a.l.a.p.IdentityEncoder
-            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
-         -->
-        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
-      </analyzer>
-    </fieldtype>
-
    <!-- lowercases the entire field value, keeping it as a single token.  -->
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
@ -410,38 +238,10 @@
      </analyzer>
    </fieldType>

-    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
-
-    <!-- This point type indexes the coordinates as separate fields (subFields)
-      If subFieldType is defined, it references a type, and a dynamic field
-      definition is created matching *___<typename>.  Alternately, if 
-      subFieldSuffix is defined, that is used to create the subFields.
-      Example: if subFieldType="double", then the coordinates would be
-        indexed in fields myloc_0___double,myloc_1___double.
-      Example: if subFieldSuffix="_d" then the coordinates would be indexed
-        in fields myloc_0_d,myloc_1_d
-      The subFields are an implementation detail of the fieldType, and end
-      users normally should not need to know about them.
-     -->
-    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
-
-    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
-    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
-
-   <!--
-    A Geohash is a compact representation of a latitude longitude pair in a single field.
-    See http://wiki.apache.org/solr/SpatialSearch
-   -->
-    <fieldtype name="geohash" class="solr.GeoHashField"/>   
-
+    
    <fieldtype name="edgengram_untokenized" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.KeywordTokenizerFactory"/>       
@ -480,7 +280,7 @@
      </analyzer>      
    </fieldtype>
 
-    <!-- Commenting this fieldtype out for now because we have no use case for 
+    <!-- RY: Commenting this fieldtype out for now because we have no use case for 
      a tokenized, unstemmed autocomplete field. Identical to edgengram_stemmed but without
      the stemming.
    <fieldtype name="edgengram_unstemmed" class="solr.TextField">
@ -535,125 +335,75 @@
       when adding a document.
   -->

-<!-- ****************************  Vitro Fields *************************** -->
+  <!-- ****************************  Vitro Fields *************************** -->
+  
+  <field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/> 
+  
+  <field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
+  
+  <field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
+  <field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
+  
+  <field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
+  <field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
+  <field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
+  
+  <field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
+  <!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
+  <field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
+  <!-- A sortable version of nameLowercase -->
+  <field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" /> 
+  <field name="nameUnstemmed" type="text_unstemmed" indexed="true" stored="false" multiValued="true"/>
+  <field name="nameStemmed" type="text_stemmed" indexed="true" stored="false" multiValued="true"/>  
+     
+  <!-- Autocomplete search fields -->
+  <field name="acNameUntokenized" type="edgengram_untokenized" indexed="true" stored="false" multiValued="true" />
+  <!-- <field name="acNameUnstemmed" type="edgengram_unstemmed" indexed="true" stored="false" multiValued="true" /> -->
+  <field name="acNameStemmed" type="edgengram_stemmed" indexed="true" stored="false" multiValued="true" />
+  
+  <field name="indexedTime" type="long" indexed="true" stored="true"/>
+  <field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
+  
+  <field name="ALLTEXT" type="text" indexed="true" stored="true" multiValued="true"/>
+  <field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
+  <field name="ALLTEXT_PHONETIC" type="phonetic" indexed="true" stored="false" multiValued="true"/>
+  
+  <field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
+  
+  <!-- <field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/> -->
+  <field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
+  <!-- <field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
+  <field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
+  
+  <field name="modType" type="ignored"/>
+  <field name="JCLASS" type="ignored"/>
+  
+  <!-- field for storing locations of thumbnails -->
+  <field name="THUMBNAIL_URL" type="string" indexed="false" stored="true"/>
+  
+  <!-- field for source institution's url -->
+  <field name="siteURL" type="string" indexed="true" stored="true"/>
+  
+  <!--  field for source institution's name -->
+  <field name="siteName" type="string" indexed="true" stored="true"/>
+  
+  <!--  field for preferred title -->
+  <field name="PREFERRED_TITLE" type="string" indexed="true" stored="true"/>
+  
+  <!-- Copy nameLowercase to sortable field. -->  
+  <copyField source="nameRaw" dest="nameStemmed" />
+  <copyField source="nameRaw" dest="nameUnstemmed" />
+  <copyField source="nameRaw" dest="nameLowercase" />
+  <copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
+     
+  <!-- ****************************  End Vitro Fields *************************** -->

-<field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/> 

-<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
-
-<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
-<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
-
-<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
-<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
-<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
-
-<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
-<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
-<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
-<!-- A sortable version of nameLowercase -->
-<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" /> 
-<field name="nameUnstemmed" type="text_unstemmed" indexed="true" stored="false" multiValued="true"/>
-<field name="nameStemmed" type="text_stemmed" indexed="true" stored="false" multiValued="true"/>  
   
-<!-- Autocomplete search fields -->
-<field name="acNameUntokenized" type="edgengram_untokenized" indexed="true" stored="false" multiValued="true" />
-<!-- <field name="acNameUnstemmed" type="edgengram_unstemmed" indexed="true" stored="false" multiValued="true" /> -->
-<field name="acNameStemmed" type="edgengram_stemmed" indexed="true" stored="false" multiValued="true" />
-
-<field name="indexedTime" type="long" indexed="true" stored="true"/>
-<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
-
-<field name="ALLTEXT" type="text" indexed="true" stored="true" multiValued="true"/>
-<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
-<field name="ALLTEXT_PHONETIC" type="phonetic" indexed="true" stored="false" multiValued="true"/>
-
-<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
-
-<!-- <field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/> -->
-<field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
-<!-- <field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
-<field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
-
-<field name="modType" type="ignored"/>
-<field name="JCLASS" type="ignored"/>
-
-<!-- field for storing locations of thumbnails -->
-<field name="THUMBNAIL_URL" type="string" indexed="false" stored="true"/>
-
-<!-- field for source institution's url -->
-<field name="siteURL" type="string" indexed="true" stored="true"/>
-
-<!--  field for source institution's name -->
-<field name="siteName" type="string" indexed="true" stored="true"/>
-
-<!--  field for preferred title -->
-<field name="PREFERRED_TITLE" type="string" indexed="true" stored="true"/>
-
-<!-- Copy nameLowercase to sortable field. -->
-<copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
-   
-<!-- ****************************  End Vitro Fields *************************** -->
-
-
-   <!-- catchall field, containing all other searchable text fields (implemented
-        via copyField further on in this schema  -->
-   <!-- Same as ALLTEXT
-        <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> 
-     -->
-
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
-
   <!-- Uncommenting the following will create a "timestamp" field using
        a default value of "NOW" to indicate when each document was indexed.
     -->
-   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
-   
-
-   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
-        will be used if the name matches any of the patterns.
-        RESTRICTION: the glob-like pattern in the name attribute must have
-        a "*" only at the start or the end.
-        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
-        Longer patterns will be matched first.  if equal size patterns
-        both match, the first appearing in the schema will be used.  -->
-   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
-   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
-   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
-   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
-   <dynamicField name="*_txt" type="text"    indexed="true"  stored="true" multiValued="true"/>
-   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
-   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
-   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
-
-   <!-- Type used to index the lat and lon components for the "location" FieldType -->
-   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>
-
-   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
-   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
-
-   <!-- some trie-coded dynamic fields for faster range queries -->
-   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
-   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
-   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
-   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
-   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
-
-   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
-
-   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
-   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
-
-   <dynamicField name="random_*" type="random" />
-
-   <!-- uncomment the following to ignore any fields that don't already match an existing 
-        field name or dynamic field, rather than reporting them as an error. 
-        alternately, change the type="ignored" to some other type e.g. "text" if you want 
-        unknown fields indexed and/or stored by default --> 
-   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
-   
+   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>     
 </fields>

 <!-- Field to use to determine and enforce document uniqueness.