Handle diacritics in PagedSearchController. NIHVIVO-3277

2011-11-03 16:49:15 +00:00 · 2011-11-03 16:49:15 +00:00 · 98ded1f8bb
commit 98ded1f8bb
parent 7b6a22f447
6 changed files with 141 additions and 415 deletions
--- a/solr/homeDirectoryTemplate/conf/schema.xml
+++ b/solr/homeDirectoryTemplate/conf/schema.xml
@ -1,33 +1,6 @@
 <?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-->

 <!--  
- This is the Solr schema file. This file should be named "schema.xml" and
- should be in the conf directory under the solr home
- (i.e. ./solr/conf/schema.xml by default) 
- or located where the classloader for the Solr webapp can find it.
-
- This example schema is the recommended starting point for users.
- It should be kept correct and concise, usable out-of-the-box.
-
- For more information, on how to customize this file, please see
- http://wiki.apache.org/solr/SchemaXml
-
 PERFORMANCE NOTE: this schema includes many optional features and should not
 be used for benchmarking.  To improve performance one could
  - set stored="false" for all fields possible (esp large fields) when you
@ -96,21 +69,6 @@
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> 

-    <!--
-     Numeric field types that index each value at various levels of precision
-     to accelerate range queries when the number of values between the range
-     endpoints is large. See the javadoc for NumericRangeQuery for internal
-     implementation details.
-
-     Smaller precisionStep values (specified in bits) will lead to more tokens
-     indexed per value, slightly larger index size, and faster range queries.
-     A precisionStep of 0 disables indexing at different precision levels.
-    -->
-    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
-
    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, and
         is a more restricted form of the canonical representation of dateTime
         http://www.w3.org/TR/xmlschema-2/#dateTime    
@ -138,39 +96,6 @@
    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

-
-    <!--
-      Note:
-      These should only be used for compatibility with existing indexes (created with older Solr versions)
-      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
-      Plain numeric field types that store and index the text
-      value verbatim (and hence don't support range queries, since the
-      lexicographic ordering isn't equal to the numeric ordering)
-    -->
-    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
-    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
-    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
-    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
-    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
-
-
-    <!--
-      Note:
-      These should only be used for compatibility with existing indexes (created with older Solr versions)
-      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.
-
-      Numeric field types that manipulate the value into
-      a string value that isn't human-readable in its internal form,
-      but with a lexicographic ordering the same as the numeric ordering,
-      so that range queries work correctly.
-    -->
-    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
-    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
-
-
    <!-- The "RandomSortField" is not used to store or search any
         data.  You can declare fields of this type it in your schema
         to generate pseudo-random orderings of your docs for sorting 
@ -196,20 +121,6 @@
         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
     -->    

-    <!-- One can also specify an existing Analyzer class that has a
-         default constructor via the class attribute on the analyzer element
-    <fieldType name="text_greek" class="solr.TextField">
-      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
-    </fieldType>
-    -->
-
-    <!-- A text field that only splits on whitespace for exact matching of words -->
-    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
@ -219,19 +130,38 @@
        to generate text:"pdp 11" rather than (text:PDP OR text:11).
        NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
    -->
+    <!--
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>       
-        <!-- in this example, we will only use synonyms at query time -->
-       <!-- <filter class="solr.SynonymFilterFactory" synonyms="syn.txt" ignoreCase="true" expand="true"/> -->
-        
-        <!-- Case insensitive stop word removal.
-          add enablePositionIncrements=true in both the index and query
-          analyzers to leave a 'gap' for more accurate phrase queries.
-        -->
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />      
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
          catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+        <filter class="solr.ASCIIFoldingFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
+        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>        
+      </analyzer>
+    </fieldType>-->
+  
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer type="index">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>      
+      <filter class="solr.ISOLatin1AccentFilterFactory"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
+              catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
+      <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.ISOLatin1AccentFilterFactory"/>      
+      <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+      <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" 
+              catenateWords="0" catenateNumbers="0" catenateAll="0"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>        
      <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>      
@ -256,7 +186,7 @@
    <!-- Like text, but without synonyms. Good for autocomplete matching of book/grant titles, etc., where we want to remove
      stop words and stem. -->
    <fieldType name="text_stemmed" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
+      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
          words="stopwords-name.txt"  enablePositionIncrements="true" />               
@ -264,28 +194,12 @@
          generateNumberParts="1" catenateWords="0"                 
          catenateNumbers="0" catenateAll="0"                
          splitOnCaseChange="1" />
+        <filter class="solr.ASCIIFoldingFilterFactory"/>        
        <filter class="solr.LowerCaseFilterFactory"/>        
        <filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>  
      </analyzer>
    </fieldType>
        
-    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
-         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
-    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
-        <filter class="solr.EnglishMinimalStemFilterFactory"/>
-        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
-             possible with WordDelimiterFilter in conjuncton with stemming. -->
-        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
-      </analyzer>
-    </fieldType>
-

    <!-- A general unstemmed text field - good if one does not know the language of the field -->
    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
@ -309,74 +223,6 @@
    </fieldType>

    
-    <!-- A general unstemmed text field that indexes tokens normally and also
-         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
-	 leading wildcard queries. -->
-    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
-           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
-      </analyzer>
-      <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-    </fieldType>
-
-    <!-- charFilter + WhitespaceTokenizer  -->
-    <!--
-    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
-      <analyzer>
-        <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-    -->
-
-    <!-- This is an example of using the KeywordTokenizer along
-         With various TokenFilterFactories to produce a sortable field
-         that does not include some properties of the source text
-      -->
-    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
-      <analyzer>
-        <!-- KeywordTokenizer does no actual tokenizing, so the entire
-             input string is preserved as a single token
-          -->
-        <tokenizer class="solr.KeywordTokenizerFactory"/>
-        <!-- The LowerCase TokenFilter does what you expect, which can be
-             when you want your sorting to be case insensitive
-          -->
-        <filter class="solr.LowerCaseFilterFactory" />
-        <!-- The TrimFilter removes any leading or trailing whitespace -->
-        <filter class="solr.TrimFilterFactory" />
-        <!-- The PatternReplaceFilter gives you the flexibility to use
-             Java Regular expression to replace any sequence of characters
-             matching a pattern with an arbitrary replacement string, 
-             which may include back references to portions of the original
-             string matched by the pattern.
-             
-             See the Java Regular Expression documentation for more
-             information on pattern and replacement string syntax.
-             
-             http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/package-summary.html
-          -->
-        <filter class="solr.PatternReplaceFilterFactory"
-                pattern="([^a-z])" replacement="" replace="all"
-        />
-      </analyzer>
-    </fieldType>
-    
    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
@ -384,24 +230,6 @@
      </analyzer>
    </fieldtype>

-    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
-      <analyzer>
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <!--
-        The DelimitedPayloadTokenFilter can put payloads on tokens... for example,
-        a token of "foo|1.4"  would be indexed as "foo" with a payload of 1.4f
-        Attributes of the DelimitedPayloadTokenFilterFactory : 
-         "delimiter" - a one character delimiter. Default is | (pipe)
-	 "encoder" - how to encode the following value into a playload
-	    float -> org.apache.lucene.analysis.payloads.FloatEncoder,
-	    integer -> o.a.l.a.p.IntegerEncoder
-	    identity -> o.a.l.a.p.IdentityEncoder
-            Fully Qualified class name implementing PayloadEncoder, Encoder must have a no arg constructor.
-         -->
-        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
-      </analyzer>
-    </fieldtype>
-
    <!-- lowercases the entire field value, keeping it as a single token.  -->
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
@ -410,38 +238,10 @@
      </analyzer>
    </fieldType>

-    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
-      </analyzer>
-    </fieldType>
-
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
    
-    <!-- This point type indexes the coordinates as separate fields (subFields)
-      If subFieldType is defined, it references a type, and a dynamic field
-      definition is created matching *___<typename>.  Alternately, if 
-      subFieldSuffix is defined, that is used to create the subFields.
-      Example: if subFieldType="double", then the coordinates would be
-        indexed in fields myloc_0___double,myloc_1___double.
-      Example: if subFieldSuffix="_d" then the coordinates would be indexed
-        in fields myloc_0_d,myloc_1_d
-      The subFields are an implementation detail of the fieldType, and end
-      users normally should not need to know about them.
-     -->
-    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
-
-    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
-    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
-
-   <!--
-    A Geohash is a compact representation of a latitude longitude pair in a single field.
-    See http://wiki.apache.org/solr/SpatialSearch
-   -->
-    <fieldtype name="geohash" class="solr.GeoHashField"/>   
-
    <fieldtype name="edgengram_untokenized" class="solr.TextField">
      <analyzer type="index">
        <tokenizer class="solr.KeywordTokenizerFactory"/>       
@ -480,7 +280,7 @@
      </analyzer>      
    </fieldtype>
 
-    <!-- Commenting this fieldtype out for now because we have no use case for 
+    <!-- RY: Commenting this fieldtype out for now because we have no use case for 
      a tokenized, unstemmed autocomplete field. Identical to edgengram_stemmed but without
      the stemming.
    <fieldtype name="edgengram_unstemmed" class="solr.TextField">
@ -591,69 +391,19 @@
  <field name="PREFERRED_TITLE" type="string" indexed="true" stored="true"/>
  
  <!-- Copy nameLowercase to sortable field. -->  
+  <copyField source="nameRaw" dest="nameStemmed" />
+  <copyField source="nameRaw" dest="nameUnstemmed" />
+  <copyField source="nameRaw" dest="nameLowercase" />
  <copyField source="nameLowercase" dest="nameLowercaseSingleValued" />
     
  <!-- ****************************  End Vitro Fields *************************** -->


-   <!-- catchall field, containing all other searchable text fields (implemented
-        via copyField further on in this schema  -->
-   <!-- Same as ALLTEXT
-        <field name="text" type="text" indexed="true" stored="false" multiValued="true"/> 
-     -->
-
-   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
-        leading wildcard queries. -->
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
   
   <!-- Uncommenting the following will create a "timestamp" field using
        a default value of "NOW" to indicate when each document was indexed.
     -->
   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>     
-   
-
-   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
-        will be used if the name matches any of the patterns.
-        RESTRICTION: the glob-like pattern in the name attribute must have
-        a "*" only at the start or the end.
-        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
-        Longer patterns will be matched first.  if equal size patterns
-        both match, the first appearing in the schema will be used.  -->
-   <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
-   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
-   <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
-   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
-   <dynamicField name="*_txt" type="text"    indexed="true"  stored="true" multiValued="true"/>
-   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
-   <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
-   <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
-
-   <!-- Type used to index the lat and lon components for the "location" FieldType -->
-   <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>
-
-   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
-   <dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
-
-   <!-- some trie-coded dynamic fields for faster range queries -->
-   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="true"/>
-   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="true"/>
-   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="true"/>
-   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="true"/>
-   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="true"/>
-
-   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
-
-   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
-   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
-
-   <dynamicField name="random_*" type="random" />
-
-   <!-- uncomment the following to ignore any fields that don't already match an existing 
-        field name or dynamic field, rather than reporting them as an error. 
-        alternately, change the type="ignored" to some other type e.g. "text" if you want 
-        unknown fields indexed and/or stored by default --> 
-   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
-   
 </fields>

 <!-- Field to use to determine and enforce document uniqueness. 
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
@ -10,7 +10,6 @@ import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;

@ -18,15 +17,14 @@ import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;

-import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.response.FacetField;
-import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.FacetField.Count;
+import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;

@ -100,12 +98,12 @@ public class PagedSearchController extends FreemarkerHttpServlet {
    @Override
    public void doGet(HttpServletRequest request, HttpServletResponse response)
            throws IOException, ServletException {
-        boolean wasXmlRequested = isRequestedFormatXml(request);
+        VitroRequest vreq = new VitroRequest(request);
+        boolean wasXmlRequested = isRequestedFormatXml(vreq);
        if( ! wasXmlRequested ){
-            super.doGet(request,response);
+            super.doGet(vreq,response);
        }else{
            try {                
-                VitroRequest vreq = new VitroRequest(request);
                Configuration config = getConfig(vreq);            
                ResponseValues rvalues = processRequest(vreq);
                
@ -121,8 +119,6 @@ public class PagedSearchController extends FreemarkerHttpServlet {
    @Override
    protected ResponseValues processRequest(VitroRequest vreq) {    	    	
    	
-    	log.debug("All parameters present in the request: "+ vreq.getParameterMap().toString());
-    	
        //There may be other non-html formats in the future
        Format format = getFormat(vreq);            
        boolean wasXmlRequested = Format.XML == format;
@ -149,43 +145,42 @@ public class PagedSearchController extends FreemarkerHttpServlet {
            int startIndex = getStartIndex(vreq);            
            int hitsPerPage = getHitsPerPage( vreq );           

-            String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);  
-            //Clean text to prevent cross-scripting errors
-            qtxt = StringEscapeUtils.escapeHtml(qtxt);
-            log.debug("Query text is \""+ qtxt + "\""); 
+            String queryText = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);  
+            log.debug("Query text is \""+ queryText + "\""); 

-            String badQueryMsg = badQueryText( qtxt );
+
+            String badQueryMsg = badQueryText( queryText );
            if( badQueryMsg != null ){
-                return doFailedSearch(badQueryMsg, qtxt, format);
+                return doFailedSearch(badQueryMsg, queryText, format);
            }
                
-            SolrQuery query = getQuery(qtxt, hitsPerPage, startIndex, vreq);            
+            SolrQuery query = getQuery(queryText, hitsPerPage, startIndex, vreq);            
            SolrServer solr = SolrSetup.getSolrServer(getServletContext());
            QueryResponse response = null;           
            
            try {
                response = solr.query(query);
            } catch (Exception ex) {                
-                String msg = makeBadSearchMessage(qtxt, ex.getMessage());
+                String msg = makeBadSearchMessage(queryText, ex.getMessage());
                log.error("could not run Solr query",ex);
-                return doFailedSearch(msg, qtxt, format);              
+                return doFailedSearch(msg, queryText, format);              
            }
            
            if (response == null) {
                log.error("Search response was null");                                
-                return doFailedSearch("The search request contained errors.", qtxt, format);
+                return doFailedSearch("The search request contained errors.", queryText, format);
            }
            
            SolrDocumentList docs = response.getResults();
            if (docs == null) {
                log.error("Document list for a search was null");                
-                return doFailedSearch("The search request contained errors.", qtxt,format);
+                return doFailedSearch("The search request contained errors.", queryText,format);
            }
                       
            long hitCount = docs.getNumFound();
            log.debug("Number of hits = " + hitCount);
            if ( hitCount < 1 ) {                
-                return doNoHits(qtxt,format);
+                return doNoHits(queryText,format);
            }            
            
            List<Individual> individuals = new ArrayList<Individual>(docs.size());
@ -205,7 +200,7 @@ public class PagedSearchController extends FreemarkerHttpServlet {
            }          
  
            ParamMap pagingLinkParams = new ParamMap();
-            pagingLinkParams.put(PARAM_QUERY_TEXT, qtxt);
+            pagingLinkParams.put(PARAM_QUERY_TEXT, queryText);
            pagingLinkParams.put(PARAM_HITS_PER_PAGE, String.valueOf(hitsPerPage));
            
            if( wasXmlRequested ){
@ -238,11 +233,11 @@ public class PagedSearchController extends FreemarkerHttpServlet {
            if( wasHtmlRequested ){                                
                if ( !classGroupFilterRequested && !typeFilterRequested ) {
                    // Search request includes no ClassGroup and no type, so add ClassGroup search refinement links.
-                    body.put("classGroupLinks", getClassGroupsLinks(grpDao, docs, response, qtxt));                            
+                    body.put("classGroupLinks", getClassGroupsLinks(grpDao, docs, response, queryText));                            
                } else if ( classGroupFilterRequested && !typeFilterRequested ) {
                    // Search request is for a ClassGroup, so add rdf:type search refinement links
                    // but try to filter out classes that are subclasses
-                    body.put("classLinks", getVClassLinks(vclassDao, docs, response, qtxt));                       
+                    body.put("classLinks", getVClassLinks(vclassDao, docs, response, queryText));                       
                    pagingLinkParams.put(PARAM_CLASSGROUP, classGroupParam);

                } else {
@ -254,8 +249,8 @@ public class PagedSearchController extends FreemarkerHttpServlet {
            body.put("individuals", IndividualSearchResult
                    .getIndividualTemplateModels(individuals, vreq));

-            body.put("querytext", qtxt);
-            body.put("title", qtxt + " - " + appBean.getApplicationName()
+            body.put("querytext", queryText);
+            body.put("title", queryText + " - " + appBean.getApplicationName()
                    + " Search Results");
            
            body.put("hitCount", hitCount);
@ -357,24 +352,6 @@ public class PagedSearchController extends FreemarkerHttpServlet {
        List<VClass> classes = new ArrayList<VClass>(typesInHits.size());
        Map<String,Long> typeURItoCount = new HashMap<String,Long>();        
        
-//        Iterator<String> it = typesInHits.iterator();
-//        while(it.hasNext()){
-//            String typeUri = it.next();
-//            try{
-//                if( VitroVocabulary.OWL_THING.equals(typeUri))
-//                    continue;
-//                VClass type = vclassDao.getVClassByURI(typeUri);
-//                if( type != null &&
-//                    ! type.isAnonymous() &&
-//                      type.getName() != null && !"".equals(type.getName()) &&
-//                      type.getGroupURI() != null ) //don't display classes that aren't in classgroups                      
-//                    classes.add(type);
-//            }catch(Exception ex){
-//                if( log.isDebugEnabled() )
-//                    log.debug("could not add type " + typeUri, ex);
-//            }                        
-//        }
-        
        List<FacetField> ffs = rsp.getFacetFields();
        for(FacetField ff : ffs){
            if(VitroSearchTermNames.RDFTYPE.equals(ff.getName())){
@ -576,13 +553,6 @@ public class PagedSearchController extends FreemarkerHttpServlet {
        return new ExceptionResponseValues(getTemplate(f,Result.ERROR), body, e);
    }   
    
-//    private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) {
-//        Map<String, Object> body = new HashMap<String, Object>();
-//        body.put("title", "Search " + appBean.getApplicationName());
-//        body.put("query", query);
-//        return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body);
-//    }
-    
    private TemplateResponseValues doFailedSearch(String message, String querytext, Format f) {
        Map<String, Object> body = new HashMap<String, Object>();       
        body.put("title", "Search for '" + querytext + "'");        
@ -667,7 +637,7 @@ public class PagedSearchController extends FreemarkerHttpServlet {
        throw new Error("PagedSearchController.search() is unimplemented");
    }

-    protected boolean isRequestedFormatXml(HttpServletRequest req){
+    protected boolean isRequestedFormatXml(VitroRequest req){
        if( req != null ){
            String param = req.getParameter(PARAM_XML_REQUEST);
            if( param != null && "1".equals(param)){
@ -680,7 +650,7 @@ public class PagedSearchController extends FreemarkerHttpServlet {
        }
    }

-    protected Format getFormat(HttpServletRequest req){
+    protected Format getFormat(VitroRequest req){
        if( req != null && req.getParameter("xml") != null && "1".equals(req.getParameter("xml")))
            return Format.XML;
        else 
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java
@ -10,6 +10,7 @@ import java.util.List;
 import javax.servlet.ServletContext;
 import javax.servlet.ServletContextEvent;

+import org.apache.commons.httpclient.HttpClient;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.solr.client.solrj.SolrServer;
@ -61,14 +62,19 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
                        );
                return;
            }            
+            
+            //HttpClient httpClient = new HttpClient();
+            
            CommonsHttpSolrServer server;
+            boolean useMultiPartPost = true;
            //It would be nice to use the default binary handler but there seem to be library problems
-            server = new CommonsHttpSolrServer(new URL( solrServerUrl ),null,new XMLResponseParser(),false); 
+            server = new CommonsHttpSolrServer(new URL( solrServerUrl ),null,new XMLResponseParser(),useMultiPartPost); 
            server.setSoTimeout(10000);  // socket read timeout
            server.setConnectionTimeout(10000);
            server.setDefaultMaxConnectionsPerHost(100);
            server.setMaxTotalConnections(100);         
            server.setMaxRetries(1);
+            
            context.setAttribute(LOCAL_SOLR_SERVER, server);
            
            /* set up the individual to solr doc translation */            
--- a/webapp/themes/vitro/templates/identity.ftl
+++ b/webapp/themes/vitro/templates/identity.ftl
@ -40,7 +40,7 @@
        <fieldset>
            <legend>Search form</legend>
            
-            <form id="search-form" action="${urls.search}" name="search" role="search"> 
+            <form id="search-form" action="${urls.search}" name="search" role="search" accept-charset="UTF-8" method="POST"> 
                <div id="search-field">
                    <input type="text" name="querytext" class="search-vitro" value="${querytext!}" autocapitalize="off" />
                    <input type="submit" value="Search" class="search">
--- a/webapp/web/templates/freemarker/page/partials/identity.ftl
+++ b/webapp/web/templates/freemarker/page/partials/identity.ftl
@ -31,7 +31,7 @@
        <fieldset>
            <legend>Search form</legend>
            
-            <form id="search-form" action="${urls.search}" name="search" role="search"> 
+            <form id="search-form" action="${urls.search}" name="search" role="search" accept-charset="UTF-8" method="POST"> 
                <div id="search-field">
                    <input type="text" name="querytext" class="search-vitro" value="${querytext!}" autocapitalize="off" />
                    <input type="submit" value="Search" class="submit">
--- a/webapp/web/templates/freemarker/page/partials/search.ftl
+++ b/webapp/web/templates/freemarker/page/partials/search.ftl
@ -1,7 +1,7 @@
 <#-- $This file is distributed under the terms of the license in /doc/license.txt$ -->

 <div id="searchBlock">
-    <form id="searchForm" action="${urls.search}" >                    
+    <form id="searchForm" action="${urls.search}" accept-charset="UTF-8" method="POST">                    
        <label for="search">Search </label>
        <input type="text" name="querytext" id="search" class="search-form-item" value="${querytext!}" size="20" autocapitalize="off" />
        <input class="search-form-submit" name="submit" type="submit"  value="Search" />