From 7905b80df41f5ded688df572ed6ee0f697ecb84b Mon Sep 17 00:00:00 2001 From: Brian Caruso Date: Mon, 15 Jul 2013 14:14:15 -0400 Subject: [PATCH] Adding initial search web service for updating uris. VIVO-98 --- .../controller/SearchServiceController.java | 92 ++++++++++ .../search/controller/UpdateUrisInIndex.java | 172 ++++++++++++++++++ .../webapp/search/indexing/IndexBuilder.java | 18 +- .../controller/UpdateUrisInIndexTest.java | 66 +++++++ webapp/web/WEB-INF/web.xml | 9 + .../body/search/searchService-help.ftl | 18 ++ 6 files changed, 372 insertions(+), 3 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SearchServiceController.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndex.java create mode 100644 webapp/test/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndexTest.java create mode 100644 webapp/web/templates/freemarker/body/search/searchService-help.ftl diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SearchServiceController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SearchServiceController.java new file mode 100644 index 000000000..22116f3e6 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SearchServiceController.java @@ -0,0 +1,92 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.controller; + +import java.io.IOException; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.auth.permissions.SimplePermission; +import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; +import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; +import edu.cornell.mannlib.vitro.webapp.filestorage.uploadrequest.MultipartHttpServletRequest; +import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; + +/** + * Accepts requests to update a set of URIs in the search index. + */ +@SuppressWarnings("serial") +public class SearchServiceController extends FreemarkerHttpServlet { + private static final Log log = LogFactory.getLog(SearchServiceController.class); + + @Override + protected Actions requiredActions(VitroRequest vreq) { + return SimplePermission.MANAGE_SEARCH_INDEX.ACTIONS; + } + + /** + * Handle the different actions. If not specified, the default action is to + * show the help page. + */ + @Override + protected ResponseValues processRequest(VitroRequest req) { + try { + // Works by side effect: parse the multi-part request and stash FileItems in request + new MultipartHttpServletRequest( req ); + + //figure out what action to perform + String pathInfo = req.getPathInfo(); + + if( pathInfo == null || pathInfo.trim().isEmpty() || "/".equals(pathInfo.trim()) ){ + return doHelpForm(req); + } + + pathInfo = pathInfo.substring(1); //get rid of leading slash + + if (VERBS.UPDATE_URIS_IN_SEARCH.verb.equals( pathInfo )) { + return doUpdateUrisInSearch(req); + } else { + return doHelpForm(req); + } + } catch (Exception e) { + return new ExceptionResponseValues(e); + } + } + + + public ResponseValues doUpdateUrisInSearch(HttpServletRequest req ) + throws IOException, ServletException { + + IndexBuilder builder = IndexBuilder.getBuilder(getServletContext()); + if( builder == null ) + throw new ServletException( "Could not get search index builder from context. Check smoke test"); + + new UpdateUrisInIndex().doUpdateUris( req, builder); + + TemplateResponseValues trv = new TemplateResponseValues( "" ); + return trv; + } + + + public ResponseValues doHelpForm(HttpServletRequest req){ + return new TemplateResponseValues( "searchService-help.ftl"); + } + + public enum VERBS{ + UPDATE_URIS_IN_SEARCH("updateUrisInSearch"); + + public final String verb; + VERBS(String verb){ + this.verb = verb; + } + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndex.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndex.java new file mode 100644 index 000000000..9f8689021 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndex.java @@ -0,0 +1,172 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.controller; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.regex.Pattern; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.fileupload.FileItemIterator; +import org.apache.commons.fileupload.FileItemStream; +import org.apache.commons.fileupload.FileUploadException; +import org.apache.commons.fileupload.servlet.ServletFileUpload; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; + +/** + * Class that performs the update of the uris in the search index + * for the SearchService. + */ + +public class UpdateUrisInIndex { + private static final Log log = LogFactory.getLog(UpdateUrisInIndex.class); + + /** + * Web service for update in search index of a list of URIs. + * @throws IOException + */ + protected void doUpdateUris(HttpServletRequest req, IndexBuilder builder) + throws ServletException, IOException{ + + + boolean isMultipart = ServletFileUpload.isMultipartContent(req); + if( ! isMultipart ) + throw new ServletException("Expected Multipart Content"); + + + String charEncoding = getEncoding(req); + try{ + int count = 0; + ServletFileUpload upload = new ServletFileUpload(); + FileItemIterator iter = upload.getItemIterator(req); + while( iter.hasNext()){ + FileItemStream item = iter.next(); + String name = item.getFieldName(); + InputStream stream = item.openStream(); + try{ + count = count + addToSearchQueue(builder, new InputStreamReader(stream, charEncoding)); + }finally{ + stream.close(); + builder.doUpdateIndex(); + } + } + }catch (FileUploadException fex){ + throw new ServletException("Could not upload file to SearchServiceController", fex); + } + } + + private String getEncoding(HttpServletRequest req){ + String enc = req.getCharacterEncoding(); + if( enc == null || enc.isEmpty() ){ + log.debug("No encoding on POST request, That is acceptable."); + enc = "UTF-8"; + }else if( enc.length() > 30){ + log.debug("Ignoring odd encoding of '" + enc + "'"); + enc = "UTF-8"; + }else{ + log.debug("Encoding set on POST request: " + enc); + } + log.debug("Reading POSTed URIs with encoding " + enc); + return enc; + } + + private int addToSearchQueue( IndexBuilder builder, Reader in ) + throws IOException{ + int addedUriCount = 0; + + Iterator uris = new UrisFromInputIterator( in ); + while(uris.hasNext()){ + String uri = uris.next(); + log.debug("Request to index uri '" + uri + "'"); + builder.addToChanged( uri ); + addedUriCount++; + } + + return addedUriCount; + } + + + public static class UrisFromInputIterator implements Iterator { + BufferedReader reader; + Iterator uris; + + public UrisFromInputIterator(Reader in ){ + this.reader = new BufferedReader(in); + } + + public void remove(){ throw new UnsupportedOperationException() ; } + + public boolean hasNext(){ + if( uris != null && uris.hasNext() ){ + return true; + }else{ + try { + return getFromBuffer(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + return false; + } + } + } + + public String next(){ + return uris.next(); + } + + /** Returns true if there are uris to get. + * @throws IOException */ + private boolean getFromBuffer() throws IOException{ + uris = null; + + while( uris == null || !uris.hasNext() ){ + String chunk = reader.readLine(); + if( chunk == null ){ //at end of input + break; + } else if( chunk.trim().isEmpty() ){ + continue; + }else{ + uris = lineToUris(chunk).iterator(); + if( uris.hasNext() ){ + return true; + } + } + } + return false; + } + } + + + + private static List removeNullAndEmpty(List in ){ + ArrayList out = new ArrayList(); + for( String s : in ){ + if( s != null && !s.trim().isEmpty() ){ + out.add(s); + } + } + return out; + } + + protected static List lineToUris(String line){ + List parts = removeNullAndEmpty( Arrays.asList(commaAndWhitespace.split( line ) )); + return parts; + } + + //split uris on whitespace and commas + private static final Pattern commaAndWhitespace = Pattern.compile("[,\\s]"); + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java index add4210a2..ab8e4750d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java @@ -20,6 +20,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.hp.hpl.jena.query.QueryParseException; +import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.Statement; import edu.cornell.mannlib.vitro.webapp.beans.Individual; @@ -130,12 +131,22 @@ public class IndexBuilder extends VitroBackgroundThread { * your changes with a call to doUpdateIndex(). */ public void addToChanged(Statement stmt) { - log.debug("call to addToChanged()"); + log.debug("call to addToChanged(Statement)"); synchronized(changedStmts){ changedStmts.add(stmt); } } + /** + * Convenience method to add a URI to the change queue. + */ + public void addToChanged(String uri){ + addToChanged(ResourceFactory.createStatement( + ResourceFactory.createResource(uri), + ResourceFactory.createProperty("http://ex.com/f"), + ResourceFactory.createPlainLiteral("added by IndexBuilder.addToChanged(uri)"))); + } + /** * This method will cause the IndexBuilder to completely rebuild * the index. @@ -244,7 +255,8 @@ public class IndexBuilder extends VitroBackgroundThread { for( StatementToURIsToUpdate stu : stmtToURIsToIndexFunctions ) { stu.startIndexing(); } - + + //keep uris unique by using a HashSet Collection urisToUpdate = new HashSet(); for( Statement stmt : getAndClearChangedStmts() ){ for( StatementToURIsToUpdate stu : stmtToURIsToIndexFunctions ){ @@ -256,7 +268,7 @@ public class IndexBuilder extends VitroBackgroundThread { for( StatementToURIsToUpdate stu : stmtToURIsToIndexFunctions ) { stu.endIndxing(); } - + return urisToUpdate; } diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndexTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndexTest.java new file mode 100644 index 000000000..662c68e95 --- /dev/null +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/controller/UpdateUrisInIndexTest.java @@ -0,0 +1,66 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.controller; + +import java.io.StringReader; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Accepts requests to update a set of URIs in the search index. + */ +public class UpdateUrisInIndexTest { + + @Test + public void lineToUrisTest(){ + Assert.assertEquals(Arrays.asList("uri1"), UpdateUrisInIndex.lineToUris( "uri1")); + Assert.assertEquals(Arrays.asList("uri1", "uri2"), UpdateUrisInIndex.lineToUris( "uri1,uri2")); + + Assert.assertEquals(Arrays.asList("uri1"), UpdateUrisInIndex.lineToUris( "uri1\n")); + Assert.assertEquals(Arrays.asList("uri1","uri2"), UpdateUrisInIndex.lineToUris( "uri1\nuri2")); + + Assert.assertEquals(Collections.EMPTY_LIST, UpdateUrisInIndex.lineToUris( "" )); + Assert.assertEquals(Collections.EMPTY_LIST, UpdateUrisInIndex.lineToUris( "," )); + Assert.assertEquals(Collections.EMPTY_LIST, UpdateUrisInIndex.lineToUris( " , " )); + } + + + @Test + public void UrisFromInputIteratorTest(){ + doUrisFromInputIterator("",0); + doUrisFromInputIterator(" ",0); + doUrisFromInputIterator(" , ",0); + doUrisFromInputIterator("\n",0); + doUrisFromInputIterator("\n\n\n",0); + doUrisFromInputIterator("http://bogus.com/n234",1); + doUrisFromInputIterator("http://bogus.com/n234\nhttp://bogus.com/n442",2); + doUrisFromInputIterator("http://bogus.com/n234, http://bogus.com/n442",2); + doUrisFromInputIterator("http://bogus.com/n234,\nhttp://bogus.com/n442\n",2); + + doUrisFromInputIterator("http://bogus.com/n234\n",1); + doUrisFromInputIterator("\nhttp://bogus.com/n234",1); + doUrisFromInputIterator("\nhttp://bogus.com/n234\n",1); + + } + + public void doUrisFromInputIterator(String input, int expectedUris){ + Iterator it = new UpdateUrisInIndex.UrisFromInputIterator( new StringReader(input) ); + int count = 0; + while( it.hasNext()){ + String uri = it.next(); + if( uri == null) + Assert.fail("UrisFromInputIterator should not return null strings \n " + + "Null string for uri #" + count + " for input '" + input + "'"); + if( uri.isEmpty()) + Assert.fail("UrisFromInputIterator should not return empty strings \n " + + "Empty string for uri #" + count + " for input '" + input + "'"); + count++; + } + Assert.assertEquals("Incorrect number of URIs from input '" + input + "'", expectedUris, count); + } + +} diff --git a/webapp/web/WEB-INF/web.xml b/webapp/web/WEB-INF/web.xml index 10d3944f4..2b246330a 100644 --- a/webapp/web/WEB-INF/web.xml +++ b/webapp/web/WEB-INF/web.xml @@ -1151,6 +1151,15 @@ /admin/getObjectClasses + + SearchServiceController + edu.cornell.mannlib.vitro.webapp.search.controller.SearchServiceController + + + SearchServiceController + /searchService/* + + GadgetController diff --git a/webapp/web/templates/freemarker/body/search/searchService-help.ftl b/webapp/web/templates/freemarker/body/search/searchService-help.ftl new file mode 100644 index 000000000..6101e000b --- /dev/null +++ b/webapp/web/templates/freemarker/body/search/searchService-help.ftl @@ -0,0 +1,18 @@ +<#-- $This file is distributed under the terms of the license in /doc/license.txt$ --> + +

Search Web Service

+ +

Add information here about how to use the Search Web Service.

+ +

*** FOLLOWING FORM IS TOTALLY MESSED UP WITH A HARDCODED URL ***

+ +
+ + + + +
+