Move to solr 4 plus corrections to SparqlQueryExecutorTest with newlines and white space characters
This commit is contained in:
parent
b642f30391
commit
120be1e6b8
122 changed files with 17422 additions and 5288 deletions
283
solr/homeDirectoryTemplate/conf/schema-old.xml
Normal file
283
solr/homeDirectoryTemplate/conf/schema-old.xml
Normal file
|
@ -0,0 +1,283 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<schema name="example" version="1.3">
|
||||
|
||||
<types>
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||
<fieldtype name="binary" class="solr.BinaryField"/>
|
||||
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<!-- A Trie based int field -->
|
||||
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0" sortMissingLast="true"/>
|
||||
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
|
||||
<!-- A Trie based date field for faster date range queries and date faceting. -->
|
||||
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
|
||||
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
|
||||
catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1"
|
||||
catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
|
||||
<!-- This field intentionally sets solr.WordDelimiterFilterFactory splitOnCaseChange to 0/false because of issue NIHVIVO-3332 -->
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" />
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- Like text, but without synonyms. Good for autocomplete matching of book/grant titles, etc.,
|
||||
where we want to remove stop words and stem. -->
|
||||
<fieldType name="text_stemmed" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory"
|
||||
generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1" />
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- A general unstemmed text field - good if one does not know the language of the field -->
|
||||
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.PhoneticFilterFactory" encoder="Metaphone" inject="false"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- lowercases the entire field value, keeping it as a single token. -->
|
||||
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright. -->
|
||||
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||
|
||||
<fieldtype name="edgengram_untokenized" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" side="front"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="edgengram_stemmed" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"
|
||||
splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
<filter class="solr.EdgeNGramFilterFactory" minGramSize="1" maxGramSize="25" side="front"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"
|
||||
splitOnCaseChange="1" />
|
||||
<filter class="solr.LowerCaseFilterFactory" />
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
<fields>
|
||||
<!-- Valid attributes for fields:
|
||||
name: mandatory - the name for the field
|
||||
type: mandatory - the name of a previously defined type from the
|
||||
<types> section
|
||||
indexed: true if this field should be indexed (searchable or sortable)
|
||||
stored: true if this field should be retrievable
|
||||
multiValued: true if this field may contain multiple values per document
|
||||
omitNorms: (expert) set to true to omit the norms associated with
|
||||
this field (this disables length normalization and index-time
|
||||
boosting for the field, and saves some memory). Only full-text
|
||||
fields or fields that need an index-time boost need norms.
|
||||
termVectors: [false] set to true to store the term vector for a
|
||||
given field.
|
||||
When using MoreLikeThis, fields used for similarity should be
|
||||
stored for best performance.
|
||||
termPositions: Store position information with the term vector.
|
||||
This will increase storage costs.
|
||||
termOffsets: Store offset information with the term vector. This
|
||||
will increase storage costs.
|
||||
default: a value that should be used if no value is specified
|
||||
when adding a document.
|
||||
-->
|
||||
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" omitNorms="true"/>
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<field name="mostSpecificTypeURIs" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" omitNorms="true" multiValued="true"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
|
||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- nameText added for NIHVIVO-3701 -->
|
||||
<field name="nameText" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="false" multiValued="true"/>
|
||||
<!-- A sortable version of nameLowercase -->
|
||||
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" omitNorms="true" />
|
||||
<field name="nameUnstemmed" type="text_unstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameStemmed" type="text_stemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<!-- Autocomplete search fields -->
|
||||
<field name="acNameUntokenized" type="edgengram_untokenized" indexed="true" stored="false" multiValued="true" />
|
||||
<field name="acNameStemmed" type="edgengram_stemmed" indexed="true" stored="false" multiValued="true" />
|
||||
|
||||
<field name="indexedTime" type="long" indexed="true" stored="true"/>
|
||||
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
|
||||
|
||||
<!-- field for storing locations of thumbnails -->
|
||||
<field name="THUMBNAIL_URL" type="string" indexed="false" stored="true"/>
|
||||
|
||||
<!-- field for source institution's url -->
|
||||
<field name="siteURL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<!-- field for source institution's name -->
|
||||
<field name="siteName" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<!-- field for preferred title -->
|
||||
<field name="PREFERRED_TITLE" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Copy nameRaw into several other fields -->
|
||||
<copyField source="nameRaw" dest="nameStemmed" />
|
||||
<copyField source="nameRaw" dest="nameUnstemmed" />
|
||||
<copyField source="nameRaw" dest="nameLowercase" />
|
||||
<copyField source="nameRaw" dest="NAME_PHONETIC" />
|
||||
<copyField source="nameRaw" dest="acNameUntokenized" />
|
||||
<copyField source="nameRaw" dest="acNameStemmed" />
|
||||
<copyField source="nameRaw" dest="nameText" />
|
||||
<!-- nameLowercaseSingleValued is not copied from nameRaw becasue nameRaw might have multiple values -->
|
||||
|
||||
<!-- field for hash signature, used for comparing to versions from external caches -->
|
||||
<field name="etag" type="string" stored="true" indexed="false" multiValued="false" />
|
||||
|
||||
|
||||
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
<!-- **************************** Dynamic Fields *************************** -->
|
||||
<dynamicField name="*_string" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_text" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_tdate" type="tdate" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_tint" type="tint" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- **************************** End Dynamic Fields *************************** -->
|
||||
|
||||
|
||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
||||
</fields>
|
||||
|
||||
<!-- Field to use to determine and enforce document uniqueness.
|
||||
Unless this field is marked with required="false", it will be a required field -->
|
||||
<uniqueKey>DocId</uniqueKey>
|
||||
|
||||
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
|
||||
<!-- This is now set in solrconfig.xml -->
|
||||
<!-- <defaultSearchField>ALLTEXT</defaultSearchField> -->
|
||||
|
||||
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
|
||||
<solrQueryParser defaultOperator="AND"/>
|
||||
|
||||
<!-- Similarity is the scoring routine for each document vs. a query.
|
||||
A custom similarity may be specified here, but the default is fine
|
||||
for most applications. -->
|
||||
<similarity class="org.apache.lucene.search.DefaultSimilarity"/>
|
||||
|
||||
</schema>
|
Loading…
Add table
Add a link
Reference in a new issue