Final changes to the search

This commit is contained in:
anupsawant 2011-05-31 14:21:16 +00:00
parent b6666dd039
commit 3c6a60818e
8 changed files with 357 additions and 146 deletions

View file

@ -237,7 +237,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- <filter class="solr.PorterStemFilterFactory"/> -->
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
</analyzer>
<analyzer type="query">
@ -251,6 +251,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
</analyzer>
</fieldType>
@ -501,8 +502,10 @@
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
<field name="contextNode" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="targetInfo" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="BETA" type="float" indexed="true" stored="true" multiValued="false"/>
<field name="PHI" type="float" indexed="true" stored="true" multiValued="false"/>
<field name="ADJACENT_NODES" type="string" indexed="true" stored="true" multiValued="true"/> -->
<field name="moniker" type="ignored" />
<field name="modType" type="ignored"/>

View file

@ -708,9 +708,10 @@
-->
<lst name="defaults">
<str name="defType">dismax</str>
<str name="qf">nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC contextNode</str>
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
<str name="pf">targetInfo</str>
<str name="echoParams">explicit</str>
<str name="bf">BETA</str>
<str name="ps">1</str>
<int name="rows">10</int>
</lst>
<!-- In addition to defaults, "appends" params can be specified

View file

@ -8883,6 +8883,8 @@ coordinate, align, ordinate, organise, organize
coordinated, interconnected, matching, unified
coordinating, coordinative
coordinative, coordinating
co-author, co-authors, coauthor, coauthors
coauthor, co-author, co-authors, coauthors
cop, apprehend, arrest, bull, collar, copper, fuzz, glom, hook, nab, nail, pig, snitch, thieve
copacetic, copasetic, copesetic, copesettic
copaline, copalite
@ -31512,7 +31514,7 @@ pubescence, puberty
pubescent, downy, puberulent, sericeous
public, populace, world
publically, publicly
publication, issue, publishing
publication, issue, publishing, publications
publicise, advertise, advertize, air, bare, publicize
publicised, publicized
publiciser, publicist, publicizer

View file

@ -29,8 +29,8 @@ public class VitroTermNames {
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing context nodes **/
public static final String CONTEXTNODE = "contextNode";
/** class name for storing targeted information **/
public static final String targetInfo = "targetInfo";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
@ -57,6 +57,8 @@ public class VitroTermNames {
/** field for beta values of all documents **/
public static final String BETA = "BETA";
public static final String PHI = "PHI";
public static final String ADJACENT_NODES = "ADJACENT_NODES";
/** adding phonetic field **/
public static final String ALLTEXT_PHONETIC = "ALLTEXT_PHONETIC";

View file

@ -24,9 +24,8 @@ public class IndividualProhibitedFromSearch {
protected static Log log = LogFactory.getLog(IndividualProhibitedFromSearch.class);
public IndividualProhibitedFromSearch( ServletContext context ){
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
this.fullModel = ModelContext.getUnionOntModelSelector(context).getFullModel();
}
public boolean isIndividualProhibited(String uri){

View file

@ -1,5 +1,9 @@
package edu.cornell.mannlib.vitro.webapp.search.beans;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
@ -21,6 +25,9 @@ import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.RDF;
import java.util.HashSet;
import java.util.Set;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
@ -30,6 +37,14 @@ public class SearchQueryHandler {
private OntModel fullModel;
private String contextNodeURI;
private int totalInd;
private static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
//private String query = "";
// private static final String queryForEducationalTraining = "SELECT ?query WHERE {" +
@ -124,20 +139,11 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
" (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {"
" (str(?TitleOrRole) as ?titleOrRole) WHERE {" //(str(?PositionLabel) as ?positionLabel)
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
@ -147,7 +153,7 @@ public class SearchQueryHandler {
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " OPTIONAL { ?c rdfs:label ?PositionLabel . } "
//+ " OPTIONAL { ?c rdfs:label ?PositionLabel . } "
+ " } ORDER BY ?PositionLabel ";
@ -199,12 +205,12 @@ public class SearchQueryHandler {
log.debug("titleOrRole is null ");
}
RDFNode positionLabel = soln.get("positionLabel");
/*RDFNode positionLabel = soln.get("positionLabel");
if(positionLabel != null){
propertyValues.append(" " + positionLabel.toString());
}else{
log.debug("positionLabel is null ");
}
}*/
}
}catch(Throwable t){
@ -228,15 +234,6 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
@ -305,7 +302,6 @@ public class SearchQueryHandler {
return propertyValues.toString();
}
public String getPropertiesAssociatedWithAwardReceipt(String uri){
StringBuffer propertyValues = new StringBuffer();
@ -315,18 +311,9 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
" (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {"
" (str(?Description) as ?description) WHERE {" //(str(?AwardReceiptLabel) as ?awardReceiptLabel)
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
@ -334,7 +321,7 @@ public class SearchQueryHandler {
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . "
//+ " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . "
+ " } ORDER BY ?AwardReceiptLabel";
@ -372,12 +359,12 @@ public class SearchQueryHandler {
log.debug("description is null ");
}
RDFNode awardReceiptLabel = soln.get("awardReceiptLabel");
/*RDFNode awardReceiptLabel = soln.get("awardReceiptLabel");
if(awardReceiptLabel != null){
propertyValues.append(" " + awardReceiptLabel.toString());
}else{
log.debug("awardReceiptLabel is null ");
}
}*/
}
}catch(Throwable t){
@ -401,15 +388,6 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
@ -450,8 +428,6 @@ public class SearchQueryHandler {
return propertyValues.toString();
}
public String getPropertiesAssociatedWithEducationalTraining(String uri){
StringBuffer propertyValues = new StringBuffer();
@ -461,15 +437,6 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
@ -555,15 +522,6 @@ public class SearchQueryHandler {
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
@ -594,7 +552,7 @@ public class SearchQueryHandler {
RDFNode linkedAuthor = soln.get("linkedAuthor");
if(linkedAuthor != null){
propertyValues.append(" " + linkedAuthor.toString());
propertyValues.append(" publications " + linkedAuthor.toString());
}else{
log.debug("linkedAuthor is null ");
}
@ -608,7 +566,7 @@ public class SearchQueryHandler {
RDFNode editor = soln.get("editor");
if(editor != null){
propertyValues.append(" " + editor.toString());
propertyValues.append(" " + editor.toString());
}else{
log.debug("editor is null ");
}
@ -629,7 +587,7 @@ public class SearchQueryHandler {
RDFNode features = soln.get("features");
if(features != null){
propertyValues.append(" " + features.toString());
propertyValues.append(" publications " + features.toString());
}else{
log.debug("features is null ");
}
@ -651,15 +609,135 @@ public class SearchQueryHandler {
float beta=0;
RDFNode node = (Resource) fullModel.getResource(uri);
StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node);
int Conn = 0;
while(stmtItr.hasNext()){
stmtItr.next();
Conn++;
}
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
int Conn = stmtItr.toList().size();
beta = (float)Conn/totalInd;
beta *= 100;
beta += 1;
return beta;
}
public String[] getAdjacentNodes(String uri,boolean isPerson){
List<String> queryList = new ArrayList<String>();
Set<String> adjacentNodes = new HashSet<String>();
Set<String> coauthorNames = new HashSet<String>();
String[] info = new String[]{"",""};
StringBuffer adjacentNodesConcat = new StringBuffer();
StringBuffer coauthorBuff = new StringBuffer();
adjacentNodesConcat.append("");
coauthorBuff.append("");
queryList.add(prefix +
" SELECT ?adjobj (str(?adjobjLabel) as ?coauthor) " +
" WHERE { " +
" ?uri rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" ?uri ?prop ?obj . " +
" ?obj rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj ?prop2 ?obj2 . " +
" ?obj2 rdf:type <http://vivoweb.org/ontology/core#InformationResource> . " +
" ?obj2 ?prop3 ?obj3 . " +
" ?obj3 rdf:type <http://vivoweb.org/ontology/core#Relationship> . " +
" ?obj3 ?prop4 ?adjobj . " +
" ?adjobj rdfs:label ?adjobjLabel . " +
" ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?prop3!=rdf:type) . " +
" FILTER (?prop4!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
"}");
queryList.add(prefix +
" SELECT ?adjobj " +
" WHERE{ " +
" ?uri rdf:type foaf:Agent . " +
" ?uri ?prop ?obj . " +
" ?obj ?prop2 ?adjobj . " +
" FILTER (?prop !=rdf:type) . " +
" FILTER isURI(?obj) . " +
" FILTER (?prop2!=rdf:type) . " +
" FILTER (?adjobj != ?uri) . " +
" FILTER isURI(?adjobj) . " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Organization> . } " +
" UNION " +
" { ?adjobj rdf:type <http://xmlns.com/foaf/0.1/Person> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#InformationResource> . } " +
" UNION " +
" { ?adjobj rdf:type <http://vivoweb.org/ontology/core#Location> . } ." +
"}");
Query query;
QuerySolution soln;
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
Iterator<String> queryItr = queryList.iterator();
fullModel.enterCriticalSection(Lock.READ);
Resource adjacentIndividual = null;
RDFNode coauthor = null;
try{
while(queryItr.hasNext()){
if(!isPerson){
queryItr.next(); // we don't want first query to execute if the ind is not a person.
}
query = QueryFactory.create(queryItr.next(),Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,fullModel,initialBinding);
try{
ResultSet results = qexec.execSelect();
while(results.hasNext()){
soln = results.nextSolution();
adjacentIndividual = (Resource)soln.get("adjobj");
if(adjacentIndividual!=null){
adjacentNodes.add(adjacentIndividual.getURI());
}
coauthor = soln.get("coauthor");
if(coauthor!=null){
coauthorNames.add(" co-authors " + coauthor.toString());
}
}
}catch(Exception e){
log.error("Error found in getAdjacentNodes method of SearchQueryHandler");
}finally{
qexec.close();
}
}
queryList = null;
Iterator<String> itr = adjacentNodes.iterator();
while(itr.hasNext()){
adjacentNodesConcat.append(itr.next() + " ");
}
info[0] = adjacentNodesConcat.toString();
itr = coauthorNames.iterator();
while(itr.hasNext()){
coauthorBuff.append(itr.next());
}
info[1] = coauthorBuff.toString();
}
catch(Throwable t){
log.error(t,t);
}finally{
fullModel.leaveCriticalSection();
adjacentNodes = null;
adjacentNodesConcat = null;
coauthorBuff = null;
}
return info;
}
}

View file

@ -2,6 +2,7 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -12,12 +13,16 @@ import org.apache.lucene.document.Document;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.joda.time.DateTime;
import org.openrdf.model.vocabulary.RDF;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.vocabulary.OWL;
import java.util.Hashtable;
import java.util.Map;
import java.util.StringTokenizer;
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
@ -48,7 +53,9 @@ public class IndividualToSolrDocument implements Obj2DocIface {
private SearchQueryHandler searchQueryHandler;
public static Map<String,Float> betas = new Hashtable<String,Float>();
private static List<String> contextNodeClassNames = new ArrayList<String>();
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch,
@ -56,8 +63,10 @@ public class IndividualToSolrDocument implements Obj2DocIface {
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
this.searchQueryHandler = searchQueryHandler;
fillContextNodes();
}
@SuppressWarnings("static-access")
@Override
public Object translate(Object obj) throws IndexingException{
@ -68,13 +77,10 @@ public class IndividualToSolrDocument implements Obj2DocIface {
Individual ent = (Individual)obj;
String value;
String classPublicNames = "";
StringBuffer classPublicNames = new StringBuffer();
classPublicNames.append("");
SolrInputDocument doc = new SolrInputDocument();
float beta = searchQueryHandler.calculateBeta(ent.getURI());
doc.addField(term.BETA,beta);
//float beta =1;
//DocId
String id = ent.getURI();
log.debug("translating " + id);
@ -100,8 +106,12 @@ public class IndividualToSolrDocument implements Obj2DocIface {
// Types and classgroups
boolean prohibited = false;
List<VClass> vclasses = ent.getVClasses(false);
ArrayList<String> superClassNames = new ArrayList<String>();
String superLclName = null;
long tClassgroup = System.currentTimeMillis();
for(VClass clz : vclasses){
superLclName = clz.getLocalName();
superClassNames.add(superLclName);
if(clz.getURI() == null){
continue;
}else if(OWL.Thing.getURI().equals(clz.getURI())){
@ -110,7 +120,10 @@ public class IndividualToSolrDocument implements Obj2DocIface {
} else if(clz.getURI().startsWith(OWL.NS)){
log.debug("not indexing " + id + " because of type " + clz.getURI());
return null;
} else {
} else if(contextNodeClassNames.contains(superLclName)) { // check to see if context node is being indexed.
return null;
}
else {
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()))
prohibited = true;
if( clz.getSearchBoost() != null)
@ -123,8 +136,10 @@ public class IndividualToSolrDocument implements Obj2DocIface {
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
}
if(clz.getName() != null)
classPublicNames += clz.getName();
if(clz.getName() != null){
classPublicNames.append(" ");
classPublicNames.append(clz.getName());
}
//Classgroup URI
if(clz.getGroupURI() != null){
@ -134,6 +149,10 @@ public class IndividualToSolrDocument implements Obj2DocIface {
}
}
if(superClassNames.isEmpty()){
return null;
}
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
@ -156,38 +175,95 @@ public class IndividualToSolrDocument implements Obj2DocIface {
value = ent.getLocalName();
}
doc.addField(term.NAME_RAW, value, NAME_BOOST+beta);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST+beta);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST+beta);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST+beta);
// collecting object property statements
String uri = ent.getURI();
StringBuffer objectNames = new StringBuffer();
objectNames.append("");
String t=null;
StringBuffer addUri = new StringBuffer();
addUri.append("");
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
objectNames.append(" ");
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
addUri.append(" ");
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
// adding beta value
float beta = 0;
if(betas.containsKey(uri)){
beta = betas.get(uri);
}else{
beta = searchQueryHandler.calculateBeta(uri); // or calculate & put in map
betas.put(uri, beta);
}
//doc.addField(term.BETA,beta);
// adding PHI value
boolean isPerson = (superClassNames.contains("Person")) ? true : false ;
String adjInfo[] = searchQueryHandler.getAdjacentNodes(uri,isPerson);
StringBuffer info = new StringBuffer();
info.append(adjInfo[0]);
info.append(addUri.toString());
//doc.addField(term.ADJACENT_NODES,info.toString()); // adding adjacent nodes
float phi = calculatePHI(info);
//doc.addField(term.PHI, phi); // adding phi value
doc.addField(term.NAME_RAW, value, NAME_BOOST+beta+phi);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST+beta+phi);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST+beta+phi);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST+beta+phi);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
long tContextNodes = System.currentTimeMillis();
String contextNodePropertyValues = "";
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI());
// collecting context node information
StringBuffer targetInfo = new StringBuffer();
targetInfo.append("");
if(superClassNames.contains("Agent")){
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI()));
objectNames.append(" ");
objectNames.append(searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI()));
}
if(superClassNames.contains("InformationResource")){
targetInfo.append(" ");
targetInfo.append(searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()));
}
doc.addField(term.CONTEXTNODE, contextNodePropertyValues);
doc.addField(term.targetInfo, targetInfo.toString() + adjInfo[1]);
log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes));
long tMoniker = System.currentTimeMillis();
//Moniker
if(ent.getMoniker() != null){
doc.addField(term.MONIKER, ent.getMoniker());
}
//boost for entity
if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
doc.setDocumentBoost(ent.getSearchBoost());
// if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
// doc.setDocumentBoost(ent.getSearchBoost());
//thumbnail
try{
@ -205,56 +281,76 @@ public class IndividualToSolrDocument implements Obj2DocIface {
Object anon[] = { new Long((new DateTime() ).getMillis()) };
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
log.debug("time to include thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
long tPropertyStatements = System.currentTimeMillis();
//collecting data property statements
if(!prohibited){
//ALLTEXT, all of the 'full text'
String t=null;
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
value+= " " + classPublicNames;
StringBuffer allTextValue = new StringBuffer();
allTextValue.append("");
allTextValue.append(" ");
allTextValue.append(((t=ent.getName()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(((t=ent.getAnchor()) == null)?"":t);
allTextValue.append(" ");
allTextValue.append(classPublicNames.toString());
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
if (dataPropertyStatements != null) {
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
while (dataPropertyStmtIter.hasNext()) {
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
allTextValue.append(" ");
allTextValue.append(((t=dataPropertyStmt.getData()) == null)?"":t);
}
}
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
allTextValue.append(objectNames.toString());
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
doc.addField(term.ALLTEXT, value, 4*beta);
doc.addField(term.ALLTEXTUNSTEMMED, value, 4*beta);
doc.addField(term.ALLTEXT_PHONETIC, value, PHONETIC_BOOST);
doc.setDocumentBoost(2*beta);
String alltext = allTextValue.toString();
doc.addField(term.ALLTEXT, alltext, 2.5F*beta*phi);
doc.addField(term.ALLTEXTUNSTEMMED, alltext, 2.5F*beta*phi);
doc.addField(term.ALLTEXT_PHONETIC, alltext, PHONETIC_BOOST);
doc.setDocumentBoost(2.5F*beta*phi);
}
return doc;
}
/*
* Method for calculation of PHI for a doc.
*/
public float calculatePHI(StringBuffer adjNodes){
StringTokenizer nodes = new StringTokenizer(adjNodes.toString()," ");
String uri=null;
float phi=0.1F;
float beta=0;
int size=0;
while(nodes.hasMoreTokens()){
size++;
uri = nodes.nextToken();
if(betas.containsKey(uri)){ // get if already calculated
phi += betas.get(uri);
}else{ // query if not calculated and put in map
beta = searchQueryHandler.calculateBeta(uri);
betas.put(uri, beta);
phi+=beta;
}
}
if(size>0)
phi = (float)phi/size;
else
phi = 1;
return phi;
}
// public IndividualToSolrDocument(Entity2LuceneDoc e2d){
//// entityToLucene = e2d;
// luceneToSolr = new LuceneDocToSolrDoc();
@ -292,6 +388,32 @@ public class IndividualToSolrDocument implements Obj2DocIface {
return ent;
}
private void fillContextNodes(){
this.contextNodeClassNames.add("Role");
this.contextNodeClassNames.add("AttendeeRole");
this.contextNodeClassNames.add("ClinicalRole");
this.contextNodeClassNames.add("LeaderRole");
this.contextNodeClassNames.add("MemberRole");
this.contextNodeClassNames.add("OutreachProviderRole");
this.contextNodeClassNames.add("PresenterRole");
this.contextNodeClassNames.add("ResearcherRole");
this.contextNodeClassNames.add("InvestigatorRole");
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
this.contextNodeClassNames.add("PrincipalInvestigatorRole");
this.contextNodeClassNames.add("ServiceProviderRole");
this.contextNodeClassNames.add("TeacherRole");
this.contextNodeClassNames.add("Position");
this.contextNodeClassNames.add("FacultyAdministrativePosition");
this.contextNodeClassNames.add("FacultyPosition");
this.contextNodeClassNames.add("LibrarianPosition");
this.contextNodeClassNames.add("Non-AcademicPosition");
this.contextNodeClassNames.add("Non-FacultyAcademicPosition");
this.contextNodeClassNames.add("PostdoctoralPosition");
this.contextNodeClassNames.add("AdvisingRelationship");
this.contextNodeClassNames.add("Authorship");
}
public static float NAME_BOOST = 2.0F;
public static float PHONETIC_BOOST = 0.1F;

View file

@ -19,6 +19,7 @@ import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
public class SolrIndexer implements IndexerIface {
@ -134,6 +135,9 @@ public class SolrIndexer implements IndexerIface {
server.commit();
} catch (Exception e) {
log.error("Could not commit to solr server", e);
}finally{
IndividualToSolrDocument.betas.clear();
IndividualToSolrDocument.betas = null;
}
try {