[VIVO-1031] HTML sanitisation is slow - for now, replace AntiSamy with simple RegEx of JavaScript patterns for significant boost. Will add full sanitisation to the input later.

This commit is contained in:
grahamtriggs 2015-10-09 10:34:23 +01:00
parent b64b87ee1a
commit f37f115160

View file

@ -3,6 +3,7 @@
package edu.cornell.mannlib.vitro.webapp.web.templatemodels; package edu.cornell.mannlib.vitro.webapp.web.templatemodels;
import java.util.Map; import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -58,7 +59,7 @@ public abstract class BaseTemplateModel {
* Currently this only checks for XSS exploits. * Currently this only checks for XSS exploits.
*/ */
protected String cleanTextForDisplay( String dirty){ protected String cleanTextForDisplay( String dirty){
return AntiScript.cleanText(dirty); return simpleScriptStripper(dirty);
} }
/** /**
@ -66,7 +67,23 @@ public abstract class BaseTemplateModel {
* a map. Map may be modified. * a map. Map may be modified.
*/ */
protected <T> void cleanMapValuesForDisplay( Map<T,String> map){ protected <T> void cleanMapValuesForDisplay( Map<T,String> map){
AntiScript.cleanMapValues(map); for( T key : map.keySet() ){
map.put(key, simpleScriptStripper(map.get(key)) );
}
} }
// private static final Pattern stripScriptTags = Pattern.compile("<script\\b[^<]*(?:(?!</script>)<[^<]*)*</script>");
// private static final Pattern stripOnAttributes = Pattern.compile(" on[a-z]*=\"(?:[^\\\\\"]|\\\\.)*\"");
private static final Pattern stripJavascript = Pattern.compile("(<script\\b[^<]*(?:(?!</script>)<[^<]*)*</script>)|( on[a-z]*=\"(?:[^\\\\\"]|\\\\.)*\")");
protected String simpleScriptStripper(String dirtyInput) {
if( dirtyInput == null )
return null;
if (dirtyInput.contains("<script") || dirtyInput.contains(" on")) {
return stripJavascript.matcher(dirtyInput).replaceAll("");
}
return dirtyInput;
}
} }