From e844444d376496d280a672bdae9b84a3f865a062 Mon Sep 17 00:00:00 2001 From: bjl23 Date: Mon, 3 Jan 2011 17:07:11 +0000 Subject: [PATCH] NIHVIVO-1194 stripping characters invalid in XML from user input --- .../webapp/edit/n3editing/EditN3Utils.java | 18 +++++++ .../webapp/edit/n3editing/EditSubmission.java | 21 ++++++-- .../edit/n3editing/EditN3UtilsTest.java | 50 +++++++++++++++++++ 3 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 webapp/test/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3UtilsTest.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3Utils.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3Utils.java index c471cd1cb..79b6eebff 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3Utils.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3Utils.java @@ -25,6 +25,8 @@ import edu.cornell.mannlib.vitro.webapp.auth.identifier.ServletIdentifierBundleF import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.filters.VitroRequestPrep; +import org.apache.xerces.util.XMLChar; + public class EditN3Utils { public static String getEditorUri(HttpServletRequest request, HttpSession session, ServletContext context){ @@ -41,6 +43,22 @@ public class EditN3Utils { return editorUri; } + /** + * Strips from a string any characters that are not valid in XML 1.0 + * @param in + * @return + */ + public static String stripInvalidXMLChars(String in) { + StringBuffer out = new StringBuffer(); + for (int i = 0; i < in.length(); i++) { + char c = in.charAt(i); + if (!XMLChar.isInvalid(c)) { + out.append(c); + } + } + return out.toString(); + } + // public static void addModTimes( Model additions, Model retractions, Model contextModel ){ // Property modtime = ResourceFactory.createProperty(VitroVocabulary.MODTIME); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditSubmission.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditSubmission.java index 044182820..72810e2c3 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditSubmission.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditSubmission.java @@ -28,6 +28,7 @@ import com.hp.hpl.jena.vocabulary.XSD; import edu.cornell.mannlib.vitro.webapp.edit.EditLiteral; import edu.cornell.mannlib.vitro.webapp.edit.elements.EditElement; +import edu.cornell.mannlib.vitro.webapp.utils.StringUtils; public class EditSubmission { private String editKey; @@ -114,10 +115,24 @@ public class EditSubmission { String[] valuesArray = queryParameters.get(var); List valueList = (valuesArray != null) ? Arrays.asList(valuesArray) : null; if( valueList != null && valueList.size() > 0 ) { - literalsFromForm.put(var, createLiteral(valueList.get(0), field.getRangeDatatypeUri(), field.getRangeLang())); + String value = valueList.get(0); + + // remove any characters that are not valid in XML 1.0 + // from user input so they don't cause problems + // with model serialization + value = EditN3Utils.stripInvalidXMLChars(value); + + if (!StringUtils.isEmpty(value)) { + literalsFromForm.put(var, createLiteral( + value, + field.getRangeDatatypeUri(), + field.getRangeLang())); + } + if(valueList != null && valueList.size() > 1 ) log.debug("For field " + var +", cannot yet handle multiple " + - "Literals for a single field, using first Literal on list"); + "Literals for a single field, using first Literal on list"); + }else{ log.debug("could not find value for parameter " + var ); } @@ -194,7 +209,7 @@ public class EditSubmission { validationErrors.putAll(this.basicValidation.validateFiles( fileItems ) ); } - protected Literal createLiteral(String value, String datatypeUri, String lang){ + protected Literal createLiteral(String value, String datatypeUri, String lang) { if( datatypeUri != null ){ if( "http://www.w3.org/2001/XMLSchema:anyURI".equals(datatypeUri) ){ try { diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3UtilsTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3UtilsTest.java new file mode 100644 index 000000000..e824fdbd8 --- /dev/null +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/edit/n3editing/EditN3UtilsTest.java @@ -0,0 +1,50 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.edit.n3editing; + +import junit.framework.Assert; + +import org.apache.commons.io.output.NullOutputStream; +import org.junit.Test; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.vocabulary.RDFS; + + +public class EditN3UtilsTest { + + @Test + public void testStripInvalidXMLChars() { + Model m = ModelFactory.createDefaultModel(); + String containsInvalidXMLChars = "Blah \u0001blah \u0002blah\uDDDD"; + String clean = "Blah blah blah"; + + // add a statement with the literal incompatible with XML to model m + m.add(m.createResource(), RDFS.label, containsInvalidXMLChars); + + Assert.assertFalse(isSerializableAsXML(m)); + + String stripped = EditN3Utils.stripInvalidXMLChars( + containsInvalidXMLChars); + Assert.assertEquals(clean, stripped); + + // clear the model of any statements + m.removeAll(); + // add a statement with a literal that has been stripped of bad chars + m.add(m.createResource(), RDFS.label, stripped); + + Assert.assertTrue(isSerializableAsXML(m)); + } + + private boolean isSerializableAsXML(Model m) { + try { + NullOutputStream nullStream = new NullOutputStream(); + m.write(nullStream, "RDF/XML"); + return true; + } catch (Exception e) { + return false; + } + } + +}