Adding classLocalName to the search index and removing norms on URIs
This commit is contained in:
parent
ea87a8e579
commit
a5440c500b
3 changed files with 34 additions and 15 deletions
|
@ -248,6 +248,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
|
|
||||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
|
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
|
||||||
log.debug("Scoring of the doc explained " + explanation.toString());
|
log.debug("Scoring of the doc explained " + explanation.toString());
|
||||||
|
log.debug("Explanation's description "+ explanation.getDescription());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -705,7 +706,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
||||||
// qp.setStemmedToUnstemmed(map);
|
// qp.setStemmedToUnstemmed(map);
|
||||||
|
|
||||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
|
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
|
||||||
|
"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -76,6 +76,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
public static final String THUMBNAIL = "THUMBNAIL";
|
public static final String THUMBNAIL = "THUMBNAIL";
|
||||||
/** Should individual be included in full text search results? 1=yes 0=no */
|
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||||
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||||
|
/** class names in human readable form of an individual*/
|
||||||
|
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||||
|
/** class names in human readable form of an individual*/
|
||||||
|
public static final String CLASSLOCALNAME = "classLocalName";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
|
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
|
||||||
|
@ -152,18 +156,23 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
if( clz.getSearchBoost() != null )
|
if( clz.getSearchBoost() != null )
|
||||||
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
||||||
|
|
||||||
Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED);
|
Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||||
//typeField.setBoost(2*FIELD_BOOST);
|
|
||||||
|
|
||||||
doc.add( typeField);
|
doc.add( typeField);
|
||||||
|
|
||||||
|
if(clz.getLocalName() != null){
|
||||||
|
Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
doc.add(classLocalName);
|
||||||
|
doc.add(classLocalNameLowerCase);
|
||||||
|
}
|
||||||
|
|
||||||
if( clz.getName() != null )
|
if( clz.getName() != null )
|
||||||
classPublicNames = classPublicNames + " " + clz.getName();
|
classPublicNames = classPublicNames + " " + clz.getName();
|
||||||
|
|
||||||
//Classgroup URI
|
//Classgroup URI
|
||||||
if( clz.getGroupURI() != null ){
|
if( clz.getGroupURI() != null ){
|
||||||
Field classGroupField = new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
Field classGroupField = new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED);
|
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||||
// classGroupField.setBoost(FIELD_BOOST);
|
// classGroupField.setBoost(FIELD_BOOST);
|
||||||
doc.add(classGroupField);
|
doc.add(classGroupField);
|
||||||
}
|
}
|
||||||
|
@ -174,13 +183,13 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
|
|
||||||
/* lucene DOCID */
|
/* lucene DOCID */
|
||||||
doc.add( new Field(term.DOCID, entClassName + id,
|
doc.add( new Field(term.DOCID, entClassName + id,
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
//vitro Id
|
//vitro Id
|
||||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
//java class
|
//java class
|
||||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
//Entity Name
|
//Entity Name
|
||||||
if( ent.getRdfsLabel() != null )
|
if( ent.getRdfsLabel() != null )
|
||||||
|
@ -225,14 +234,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
} else {
|
} else {
|
||||||
value= (new DateTime()).toString(LuceneIndexer.MODTIME_DATE_FORMAT) ;
|
value= (new DateTime()).toString(LuceneIndexer.MODTIME_DATE_FORMAT) ;
|
||||||
}
|
}
|
||||||
doc.add( new Field(term.MODTIME, value , Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add( new Field(term.MODTIME, value , Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
/* timekey */
|
/* timekey */
|
||||||
try{
|
try{
|
||||||
value = null;
|
value = null;
|
||||||
if( ent.getTimekey() != null ){
|
if( ent.getTimekey() != null ){
|
||||||
value = (new DateTime(ent.getTimekey().getTime())).toString(LuceneIndexer.DATE_FORMAT);
|
value = (new DateTime(ent.getTimekey().getTime())).toString(LuceneIndexer.DATE_FORMAT);
|
||||||
doc.add(new Field(term.TIMEKEY, value, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add(new Field(term.TIMEKEY, value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
}
|
}
|
||||||
}catch(Exception ex){
|
}catch(Exception ex){
|
||||||
log.error("could not save timekey " + ex);
|
log.error("could not save timekey " + ex);
|
||||||
|
@ -242,9 +251,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
try{
|
try{
|
||||||
value = null;
|
value = null;
|
||||||
if( ent.hasThumb() )
|
if( ent.hasThumb() )
|
||||||
doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
else
|
else
|
||||||
doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
}catch(Exception ex){
|
}catch(Exception ex){
|
||||||
log.debug("could not index thumbnail: " + ex);
|
log.debug("could not index thumbnail: " + ex);
|
||||||
}
|
}
|
||||||
|
@ -253,7 +262,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
//time of index in millis past epoc
|
//time of index in millis past epoc
|
||||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
|
||||||
if( ! prohibited ){
|
if( ! prohibited ){
|
||||||
//ALLTEXT, all of the 'full text'
|
//ALLTEXT, all of the 'full text'
|
||||||
|
|
|
@ -8,6 +8,9 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||||
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -22,6 +25,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
@ -231,7 +235,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private Analyzer getAnalyzer() {
|
private Analyzer getAnalyzer() {
|
||||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
|
|
||||||
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
|
||||||
|
|
||||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
|
@ -239,6 +245,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
|
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||||
|
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
||||||
|
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue