NIHVIVO-2437 Upgrade to Solr 3.1
This commit is contained in:
parent
ad98e7723c
commit
265a86019c
27 changed files with 7060 additions and 2693 deletions
Binary file not shown.
BIN
solr/apache-solr-3.1.0.war
Normal file
BIN
solr/apache-solr-3.1.0.war
Normal file
Binary file not shown.
|
@ -12,7 +12,7 @@
|
|||
<property name="solr.build.dir" location="."/>
|
||||
<property name="solr.example.dir" location="${solr.build.dir}/exampleSolr" />
|
||||
<property name="solr.context.config.example" location="${solr.build.dir}/exampleSolrContext.xml"/>
|
||||
<property name="solr.war" location="${solr.build.dir}/apache-solr-1.4.1.war"/>
|
||||
<property name="solr.war" location="${solr.build.dir}/apache-solr-3.1.0.war"/>
|
||||
|
||||
<!-- =================================
|
||||
target: describe
|
||||
|
|
3813
solr/exampleSolr/conf/mapping-FoldToASCII.txt
Normal file
3813
solr/exampleSolr/conf/mapping-FoldToASCII.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -45,7 +45,16 @@
|
|||
that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="example" version="1.2">
|
||||
<schema name="example" version="1.3">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.2" is Solr's version number for the schema syntax and semantics. It should
|
||||
not normally be changed by applications.
|
||||
1.0: multiValued attribute did not exist, all fields are multiValued by nature
|
||||
1.1: multiValued attribute introduced, false by default
|
||||
1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
|
||||
1.3: removed optional field compress feature
|
||||
-->
|
||||
|
||||
<types>
|
||||
<!-- field type definitions. The "name" attribute is
|
||||
|
@ -56,15 +65,12 @@
|
|||
org.apache.solr.analysis package.
|
||||
-->
|
||||
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
|
||||
- StrField and TextField support an optional compressThreshold which
|
||||
limits compression (if enabled in the derived fields) to values which
|
||||
exceed a certain size (in characters).
|
||||
-->
|
||||
<!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
|
||||
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- boolean type: "true" or "false" -->
|
||||
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
|
||||
<fieldtype name="binary" class="solr.BinaryField"/>
|
||||
|
||||
|
@ -208,8 +214,12 @@
|
|||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
|
||||
form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
|
||||
to generate text:"pdp 11" rather than (text:PDP OR text:11).
|
||||
NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
|
@ -224,8 +234,10 @@
|
|||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<!-- <filter class="solr.PorterStemFilterFactory"/> -->
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
|
@ -238,6 +250,7 @@
|
|||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
@ -266,7 +279,8 @@
|
|||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
|
||||
possible with WordDelimiterFilter in conjuncton with stemming. -->
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
|
@ -397,11 +411,38 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.PathHierarchyTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- since fields of this type are by default not stored or indexed,
|
||||
any data added to them will be ignored outright. -->
|
||||
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||
|
||||
<!-- This point type indexes the coordinates as separate fields (subFields)
|
||||
If subFieldType is defined, it references a type, and a dynamic field
|
||||
definition is created matching *___<typename>. Alternately, if
|
||||
subFieldSuffix is defined, that is used to create the subFields.
|
||||
Example: if subFieldType="double", then the coordinates would be
|
||||
indexed in fields myloc_0___double,myloc_1___double.
|
||||
Example: if subFieldSuffix="_d" then the coordinates would be indexed
|
||||
in fields myloc_0_d,myloc_1_d
|
||||
The subFields are an implementation detail of the fieldType, and end
|
||||
users normally should not need to know about them.
|
||||
-->
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
|
||||
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
|
||||
<!--
|
||||
A Geohash is a compact representation of a latitude longitude pair in a single field.
|
||||
See http://wiki.apache.org/solr/SpatialSearch
|
||||
-->
|
||||
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
|
@ -412,9 +453,6 @@
|
|||
<types> section
|
||||
indexed: true if this field should be indexed (searchable or sortable)
|
||||
stored: true if this field should be retrievable
|
||||
compressed: [false] if this field should be stored using gzip compression
|
||||
(this will only apply if the field type is compressable; among
|
||||
the standard field types, only TextField and StrField are)
|
||||
multiValued: true if this field may contain multiple values per document
|
||||
omitNorms: (expert) set to true to omit the norms associated with
|
||||
this field (this disables length normalization and index-time
|
||||
|
@ -432,9 +470,6 @@
|
|||
when adding a document.
|
||||
-->
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
|
@ -460,20 +495,25 @@
|
|||
<field name="modType" type="ignored"/>
|
||||
<field name="JCLASS" type="ignored"/>
|
||||
|
||||
|
||||
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
via copyField further on in this schema -->
|
||||
<!-- Same as ALLTEXT
|
||||
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
-->
|
||||
|
||||
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
||||
leading wildcard queries. -->
|
||||
<field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<!-- Uncommenting the following will create a "timestamp" field using
|
||||
a default value of "NOW" to indicate when each document was indexed.
|
||||
-->
|
||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
|
||||
|
||||
|
||||
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
|
||||
will be used if the name matches any of the patterns.
|
||||
RESTRICTION: the glob-like pattern in the name attribute must have
|
||||
|
@ -485,10 +525,16 @@
|
|||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_txt" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
|
||||
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
||||
|
||||
<!-- some trie-coded dynamic fields for faster range queries -->
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
|
@ -527,8 +573,6 @@
|
|||
is added to the index. It's used either to index the same field differently,
|
||||
or to add multiple fields to the same field for easier/faster searching. -->
|
||||
|
||||
<!-- <copyField source="name" dest="text"/> -->
|
||||
|
||||
<!-- Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same
|
||||
destination field is to use the dynamic field syntax.
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -12,11 +12,9 @@
|
|||
|
||||
#-----------------------------------------------------------------------
|
||||
#some test synonym mappings unlikely to appear in real input text
|
||||
aaa => aaaa
|
||||
bbb => bbbb1 bbbb2
|
||||
ccc => cccc1,cccc2
|
||||
a\=>a => b\=>b
|
||||
a\,a => b\,b
|
||||
aaafoo => aaabar
|
||||
bbbfoo => bbbfoo bbbbar
|
||||
cccfoo => cccbar cccbaz
|
||||
fooaaa,baraaa,bazaaa
|
||||
|
||||
# Some synonym groups specific to this example
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
xmlns:xsl='http://www.w3.org/1999/XSL/Transform'
|
||||
>
|
||||
|
||||
<xsl:output media-type="text/html; charset=UTF-8" encoding="UTF-8"/>
|
||||
<xsl:output media-type="text/html" encoding="UTF-8"/>
|
||||
|
||||
<xsl:variable name="title" select="concat('Solr search results (',response/result/@numFound,' documents)')"/>
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<xsl:output
|
||||
method="xml"
|
||||
encoding="utf-8"
|
||||
media-type="text/xml; charset=UTF-8"
|
||||
media-type="application/xml"
|
||||
/>
|
||||
|
||||
<xsl:template match='/'>
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
<xsl:output
|
||||
method="xml"
|
||||
encoding="utf-8"
|
||||
media-type="text/xml; charset=UTF-8"
|
||||
media-type="application/xml"
|
||||
/>
|
||||
<xsl:template match='/'>
|
||||
<rss version="2.0">
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
<xsl:output
|
||||
method="html"
|
||||
encoding="UTF-8"
|
||||
media-type="text/html; charset=UTF-8"
|
||||
media-type="text/html"
|
||||
doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
|
||||
/>
|
||||
|
|
34
solr/exampleSolr/solr.xml
Normal file
34
solr/exampleSolr/solr.xml
Normal file
|
@ -0,0 +1,34 @@
|
|||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!--
|
||||
All (relative) paths are relative to the installation path
|
||||
|
||||
persistent: Save changes made via the API to this file
|
||||
sharedLib: path to a lib directory that will be shared across all cores
|
||||
-->
|
||||
<solr persistent="false">
|
||||
|
||||
<!--
|
||||
adminPath: RequestHandler path to manage cores.
|
||||
If 'null' (or absent), cores will not be manageable via request handler
|
||||
-->
|
||||
<cores adminPath="/admin/cores" defaultCoreName="collection1">
|
||||
<core name="collection1" instanceDir="." />
|
||||
</cores>
|
||||
</solr>
|
BIN
webapp/lib/apache-solr-analysis-extras-3.1.0.jar
Normal file
BIN
webapp/lib/apache-solr-analysis-extras-3.1.0.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
webapp/lib/apache-solr-core-3.1.0.jar
Normal file
BIN
webapp/lib/apache-solr-core-3.1.0.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
webapp/lib/apache-solr-solrj-3.1.0.jar
Normal file
BIN
webapp/lib/apache-solr-solrj-3.1.0.jar
Normal file
Binary file not shown.
|
@ -14,7 +14,6 @@ import net.sf.jga.fn.UnaryFunctor;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.solr.analysis.HTMLStripReader;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
|
@ -141,23 +140,27 @@ public abstract class VitroHighlighter extends UnaryFunctor<String,String> {
|
|||
|
||||
private final String stripHtml(String in){
|
||||
/* make a string with html stripped out */
|
||||
Reader stripIn =new HTMLStripReader( new StringReader( in ) );
|
||||
StringWriter stripOut = new StringWriter(in.length());
|
||||
|
||||
char bytes[] = new char[5000];
|
||||
int bytesRead = 0;
|
||||
try {
|
||||
//this is a mess, there must be a better way to do this.
|
||||
while ( true ){
|
||||
bytesRead = stripIn.read( bytes );
|
||||
if( bytesRead == -1 ) break;
|
||||
stripOut.write(bytes, 0, bytesRead );
|
||||
}
|
||||
} catch (IOException e1) {
|
||||
log.error("LuceneHighlighter.getHighlightFragments()" +
|
||||
" - unable to strip html" + e1);
|
||||
}
|
||||
return stripOut.toString();
|
||||
// ryounes 5/16/2011 Broken with upgrade to Solr 3.1: HTMLStripReader has been removed.
|
||||
// According to change list, should use HTMLStripCharFilter, but it's not immediately clear how
|
||||
// to migrate this code. Will enter Jira issue.
|
||||
// Reader stripIn = new HTMLStripReader( new StringReader( in ) );
|
||||
// StringWriter stripOut = new StringWriter(in.length());
|
||||
//
|
||||
// char bytes[] = new char[5000];
|
||||
// int bytesRead = 0;
|
||||
// try {
|
||||
// //this is a mess, there must be a better way to do this.
|
||||
// while ( true ){
|
||||
// bytesRead = stripIn.read( bytes );
|
||||
// if( bytesRead == -1 ) break;
|
||||
// stripOut.write(bytes, 0, bytesRead );
|
||||
// }
|
||||
// } catch (IOException e1) {
|
||||
// log.error("LuceneHighlighter.getHighlightFragments()" +
|
||||
// " - unable to strip html" + e1);
|
||||
// }
|
||||
// return stripOut.toString();
|
||||
return in;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -141,7 +141,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
//There may be other non-html formats in the future
|
||||
Format format = getFormat(vreq);
|
||||
boolean wasXmlRequested = Format.XML == format;
|
||||
log.debug("xml was the requested format");
|
||||
log.debug("Requested format was " + (wasXmlRequested ? "xml" : "html"));
|
||||
boolean wasHtmlRequested = ! wasXmlRequested;
|
||||
|
||||
try {
|
||||
|
|
|
@ -128,7 +128,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
//There may be other non-html formats in the future
|
||||
Format format = getFormat(vreq);
|
||||
boolean wasXmlRequested = Format.XML == format;
|
||||
log.debug("xml was the requested format");
|
||||
log.debug("Requested format was " + (wasXmlRequested ? "xml" : "html"));
|
||||
boolean wasHtmlRequested = ! wasXmlRequested;
|
||||
|
||||
try {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue