NIHVIVO-1194 stripping characters invalid in XML from user input

This commit is contained in:
bjl23 2011-01-03 17:07:11 +00:00
parent f5bbbf22e5
commit e844444d37
3 changed files with 86 additions and 3 deletions

View file

@ -25,6 +25,8 @@ import edu.cornell.mannlib.vitro.webapp.auth.identifier.ServletIdentifierBundleF
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.filters.VitroRequestPrep; import edu.cornell.mannlib.vitro.webapp.filters.VitroRequestPrep;
import org.apache.xerces.util.XMLChar;
public class EditN3Utils { public class EditN3Utils {
public static String getEditorUri(HttpServletRequest request, HttpSession session, ServletContext context){ public static String getEditorUri(HttpServletRequest request, HttpSession session, ServletContext context){
@ -41,6 +43,22 @@ public class EditN3Utils {
return editorUri; return editorUri;
} }
/**
* Strips from a string any characters that are not valid in XML 1.0
* @param in
* @return
*/
public static String stripInvalidXMLChars(String in) {
StringBuffer out = new StringBuffer();
for (int i = 0; i < in.length(); i++) {
char c = in.charAt(i);
if (!XMLChar.isInvalid(c)) {
out.append(c);
}
}
return out.toString();
}
// public static void addModTimes( Model additions, Model retractions, Model contextModel ){ // public static void addModTimes( Model additions, Model retractions, Model contextModel ){
// Property modtime = ResourceFactory.createProperty(VitroVocabulary.MODTIME); // Property modtime = ResourceFactory.createProperty(VitroVocabulary.MODTIME);

View file

@ -28,6 +28,7 @@ import com.hp.hpl.jena.vocabulary.XSD;
import edu.cornell.mannlib.vitro.webapp.edit.EditLiteral; import edu.cornell.mannlib.vitro.webapp.edit.EditLiteral;
import edu.cornell.mannlib.vitro.webapp.edit.elements.EditElement; import edu.cornell.mannlib.vitro.webapp.edit.elements.EditElement;
import edu.cornell.mannlib.vitro.webapp.utils.StringUtils;
public class EditSubmission { public class EditSubmission {
private String editKey; private String editKey;
@ -114,10 +115,24 @@ public class EditSubmission {
String[] valuesArray = queryParameters.get(var); String[] valuesArray = queryParameters.get(var);
List<String> valueList = (valuesArray != null) ? Arrays.asList(valuesArray) : null; List<String> valueList = (valuesArray != null) ? Arrays.asList(valuesArray) : null;
if( valueList != null && valueList.size() > 0 ) { if( valueList != null && valueList.size() > 0 ) {
literalsFromForm.put(var, createLiteral(valueList.get(0), field.getRangeDatatypeUri(), field.getRangeLang())); String value = valueList.get(0);
// remove any characters that are not valid in XML 1.0
// from user input so they don't cause problems
// with model serialization
value = EditN3Utils.stripInvalidXMLChars(value);
if (!StringUtils.isEmpty(value)) {
literalsFromForm.put(var, createLiteral(
value,
field.getRangeDatatypeUri(),
field.getRangeLang()));
}
if(valueList != null && valueList.size() > 1 ) if(valueList != null && valueList.size() > 1 )
log.debug("For field " + var +", cannot yet handle multiple " + log.debug("For field " + var +", cannot yet handle multiple " +
"Literals for a single field, using first Literal on list"); "Literals for a single field, using first Literal on list");
}else{ }else{
log.debug("could not find value for parameter " + var ); log.debug("could not find value for parameter " + var );
} }

View file

@ -0,0 +1,50 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.edit.n3editing;
import junit.framework.Assert;
import org.apache.commons.io.output.NullOutputStream;
import org.junit.Test;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.vocabulary.RDFS;
public class EditN3UtilsTest {
@Test
public void testStripInvalidXMLChars() {
Model m = ModelFactory.createDefaultModel();
String containsInvalidXMLChars = "Blah \u0001blah \u0002blah\uDDDD";
String clean = "Blah blah blah";
// add a statement with the literal incompatible with XML to model m
m.add(m.createResource(), RDFS.label, containsInvalidXMLChars);
Assert.assertFalse(isSerializableAsXML(m));
String stripped = EditN3Utils.stripInvalidXMLChars(
containsInvalidXMLChars);
Assert.assertEquals(clean, stripped);
// clear the model of any statements
m.removeAll();
// add a statement with a literal that has been stripped of bad chars
m.add(m.createResource(), RDFS.label, stripped);
Assert.assertTrue(isSerializableAsXML(m));
}
private boolean isSerializableAsXML(Model m) {
try {
NullOutputStream nullStream = new NullOutputStream();
m.write(nullStream, "RDF/XML");
return true;
} catch (Exception e) {
return false;
}
}
}