Adding classLocalName to the search index and removing norms on URIs

2011-03-22 20:38:59 +00:00 · 2011-03-22 20:38:59 +00:00 · a5440c500b
commit a5440c500b
parent ea87a8e579
3 changed files with 34 additions and 15 deletions
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
@ -248,6 +248,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
            		log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
            		log.debug("Scoring of the doc explained " + explanation.toString());
            		log.debug("Explanation's description "+ explanation.getDescription());
            	}
@ -705,7 +706,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
 //        map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
 //        qp.setStemmedToUnstemmed(map);
-    	MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
+    	MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ 
    				"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java
@ -76,6 +76,10 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        public static final String THUMBNAIL = "THUMBNAIL";        
        /** Should individual be included in full text search results? 1=yes 0=no */
        public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
        /** class names in human readable form of an individual*/
        public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
        /** class names in human readable form of an individual*/
        public static final String CLASSLOCALNAME = "classLocalName";        
    }
    private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
@ -152,18 +156,23 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
                if( clz.getSearchBoost() != null )
                    doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
-                Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED);
+                Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
                //typeField.setBoost(2*FIELD_BOOST);
                doc.add( typeField);
                if(clz.getLocalName() != null){
                	Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED);
                	Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED);
                	doc.add(classLocalName);
                	doc.add(classLocalNameLowerCase);
                }
                if( clz.getName() != null )
                    classPublicNames = classPublicNames + " " + clz.getName();
                //Classgroup URI
                if( clz.getGroupURI() != null ){
                	Field classGroupField = new Field(term.CLASSGROUP_URI, clz.getGroupURI(), 
-                            Field.Store.YES, Field.Index.NOT_ANALYZED);
+                            Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
                //	classGroupField.setBoost(FIELD_BOOST);
                    doc.add(classGroupField);
                }
@ -174,13 +183,13 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        /* lucene DOCID */
        doc.add( new Field(term.DOCID, entClassName + id,
-                            Field.Store.YES, Field.Index.NOT_ANALYZED));
+                            Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
        //vitro Id        
-        doc.add(  new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED));        
+        doc.add(  new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));        
        //java class
-        doc.add( new  Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add( new  Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
        //Entity Name        
        if( ent.getRdfsLabel() != null )
@ -225,14 +234,14 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        } else {
            value=  (new DateTime()).toString(LuceneIndexer.MODTIME_DATE_FORMAT) ;
        }
-        doc.add(  new Field(term.MODTIME, value , Field.Store.YES, Field.Index.NOT_ANALYZED));
+        doc.add(  new Field(term.MODTIME, value , Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
        /* timekey */
        try{
            value = null;
            if( ent.getTimekey() != null ){
                value = (new DateTime(ent.getTimekey().getTime())).toString(LuceneIndexer.DATE_FORMAT);
-                doc.add(new Field(term.TIMEKEY, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
+                doc.add(new Field(term.TIMEKEY, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
            }
        }catch(Exception ex){            
            log.error("could not save timekey " + ex);            
@ -242,9 +251,9 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        try{
            value = null;
            if( ent.hasThumb() )
-                doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
+                doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
            else
-                doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
+                doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
        }catch(Exception ex){
            log.debug("could not index thumbnail: " + ex);
        }
@ -253,7 +262,7 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        //time of index in millis past epoc
        Object anon[] =  { new Long((new DateTime() ).getMillis())  };
        doc.add(  new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
-                            Field.Store.YES, Field.Index.NOT_ANALYZED));                 
+                            Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));                 
        if( ! prohibited ){
            //ALLTEXT, all of the 'full text'
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java
@ -8,6 +8,9 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
 import java.io.File;
 import java.io.IOException;
@ -22,6 +25,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.util.Version;
@ -231,7 +235,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
     * @return
     */
    private Analyzer getAnalyzer() {
-        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
+    	
    	PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
    	analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
    	analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
        analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
@ -239,6 +245,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
        analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29));
        analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
        analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
        analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
        analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
        return analyzer;
    }