NIHVIVO-160 Complete the FileStorageHelper, with tests.

This commit is contained in:
jeb228 2010-05-24 15:20:31 +00:00
parent c4709265c4
commit 544432acc1
9 changed files with 807 additions and 60 deletions

View file

@ -17,6 +17,12 @@ public interface FileStorage {
* {@link ConfigurationProperties} for the file storage base directory.
*/
String PROPERTY_FILE_STORAGE_BASE_DIR = "upload.directory";
/**
* The default implementation will use this key to ask
* {@link ConfigurationProperties} for the maximum permissible file size.
*/
String PROPERTY_FILE_MAXIMUM_SIZE = "file.maximum.size";
/**
* The default implementation will use this key to ask
@ -24,6 +30,11 @@ public interface FileStorage {
*/
String PROPERTY_DEFAULT_NAMESPACE = "Vitro.defaultNamespace";
/**
* How often to we insert path separator characters?
*/
int SHORTY_LENGTH = 3;
/**
* Store the bytes from this stream as a file with the specified ID and
* filename. If the file already exists, it is over-written.
@ -50,7 +61,7 @@ public interface FileStorage {
* @throws FileNotFoundException
* if there is no file that matches this ID and filename.
*/
byte[] getfile(String id, String filename) throws FileNotFoundException,
byte[] getFile(String id, String filename) throws FileNotFoundException,
IOException;
/**

View file

@ -2,59 +2,305 @@
package edu.cornell.mannlib.vitro.webapp.utils.filestorage;
import static edu.cornell.mannlib.vitro.webapp.utils.filestorage.FileStorage.SHORTY_LENGTH;
import java.io.File;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.log4j.Logger;
/**
* TODO
* A collection of utility routines used by the file storage system. Routines
* exist to:
* <ul>
* <li>encode filenames for safe storage</li>
* <li>decode filenames to their original values</li>
* <li>convert an ID (with namespaces) to a path, relative to the root directory
* </li>
* <li>convert an ID (with namespaces) to an absolute path</li>
* <li>convert an ID (with namespaces) and a filename to a full path for storing
* the file</li>
* <li>parse the string that specifies the maximum size of an uploaded file</li>
* </ul>
*/
public class FileStorageHelper {
private static final Logger LOG = Logger.getLogger(FileStorageHelper.class);
public static final char HEX_ESCAPE_CHAR = '^';
public static final String HEX_ENCODE_SOURCES = "\"*+,<=>?^|\\~";
public static final char[] PATH_SINGLE_CHARACTER_SOURCES = new char[] {
'/', ':', '.' };
public static final char[] PATH_SINGLE_CHARACTER_TARGETS = new char[] {
'=', '+', ',' };
/** Same as for path, except that a period is not translated. */
public static final char[] NAME_SINGLE_CHARACTER_SOURCES = new char[] {
'/', ':' };
/** Same as for path, except that a period is not translated. */
public static final char[] NAME_SINGLE_CHARACTER_TARGETS = new char[] {
'=', '+' };
/**
* @param id
* @return
*/
public static String id2Path(String id) {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorageHelper.id2Path() not implemented.");
}
/**
* @param filename
* @return
* Encode the filename as needed to guard against illegal characters.
*
* @see edu.cornell.mannlib.vitro.webapp.utils.filestorage
*/
public static String encodeName(String filename) {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorageHelper.encodeName() not implemented.");
String hexed = addHexEncoding(filename);
return addSingleCharacterConversions(hexed,
NAME_SINGLE_CHARACTER_SOURCES, NAME_SINGLE_CHARACTER_TARGETS);
}
/**
* @param rootDir
* @param id
* @param filename
* @return
* Encode special characters to hex sequences.
*/
public static File getFullPath(File rootDir, String id, String filename) {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorageHelper.getFullPath() not implemented.");
private static String addHexEncoding(String clear) {
for (int i = 0; i < clear.length(); i++) {
char c = clear.charAt(i);
if (c > 255) {
throw new InvalidCharacterException(c, i, clear);
}
}
StringBuilder result = new StringBuilder();
for (int i = 0; i < clear.length(); i++) {
result.append(hexEncodeCharacter(clear.charAt(i)));
}
LOG.debug("Add hex encodings to '" + clear + "' giving '" + result
+ "'");
return result.toString();
}
/**
* @param rootDir
* @param id
* @return
* Create a string holding either the character or its hex-encoding.
*/
public static File getPathToIdDirectory(File rootDir, String id) {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorageHelper.getPathToIdDirectory() not implemented.");
private static String hexEncodeCharacter(char c) {
if ((c < 0x21) || (c > 0x7e) || (HEX_ENCODE_SOURCES.indexOf(c) >= 0)) {
return new StringBuilder().append(HEX_ESCAPE_CHAR).append(
toHexDigit(c / 16)).append(toHexDigit(c % 16)).toString();
} else {
return Character.toString(c);
}
}
/**
* @param name
* @return
* Return the correct hex character for this integer value.
*/
public static String decodeName(String name) {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorageHelper.decodeName() not implemented.");
private static char toHexDigit(int i) {
return "0123456789abcdef".charAt(i);
}
/**
* Perform common single-character substitutions.
*/
private static String addSingleCharacterConversions(String encoded,
char[] sources, char[] targets) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < encoded.length(); i++) {
char c = encoded.charAt(i);
result.append(translateSingleCharacter(c, sources, targets));
}
LOG.debug("Add single character conversions to '" + encoded
+ "' giving '" + result + "'");
return result.toString();
}
/**
* If a character found in the "from" set, return its corresponding
* character from the "to" set. Otherwise, return the character itself.
*/
private static char translateSingleCharacter(char c, char[] from, char[] to) {
for (int j = 0; j < from.length; j++) {
if (c == from[j]) {
return to[j];
}
}
return c;
}
/**
* Restore the filename to its original form, removing the encoding.
*
* @see edu.cornell.mannlib.vitro.webapp.utils.filestorage
*/
public static String decodeName(String coded) {
String hexed = removeSingleCharacterConversions(coded,
NAME_SINGLE_CHARACTER_SOURCES, NAME_SINGLE_CHARACTER_TARGETS);
return removeHexEncoding(hexed);
}
/**
* Convert common single-character substitutions back to their original
* values.
*/
private static String removeSingleCharacterConversions(String cleaned,
char[] sources, char[] targets) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < cleaned.length(); i++) {
char c = cleaned.charAt(i);
result.append(translateSingleCharacter(c, targets, sources));
}
LOG.debug("Remove single character conversions from '" + cleaned
+ "' giving '" + result + "'");
return result.toString();
}
/**
* Convert hex-encoded characters back to their original values.
*/
private static String removeHexEncoding(String encoded) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < encoded.length(); i++) {
char c = encoded.charAt(i);
if (c == HEX_ESCAPE_CHAR) {
try {
if (i + 2 >= encoded.length()) {
throw new InvalidPathException(
"Invalid hex encoding in path: '" + encoded
+ "'");
}
String hexChars = encoded.substring(i + 1, i + 3);
int value = Integer.parseInt(hexChars, 16);
result.append((char) value);
i += 2;
} catch (NumberFormatException e) {
throw new InvalidPathException(
"Invalid hex encoding in path: '" + encoded + "'",
e);
}
} else {
result.append(c);
}
}
LOG.debug("Remove hex encodings from '" + encoded + "' giving '"
+ result + "'");
return result.toString();
}
/**
* Translate the object ID to a relative directory path. A recognized
* namespace is translated to its prefix, and illegal characters are
* encoded. The resulting string is broken up into 3-character directory
* names (or less).
*
* @see edu.cornell.mannlib.vitro.webapp.utils.filestorage
*/
public static String id2Path(String id, Map<Character, String> namespacesMap) {
char prefix = 0;
String localName = id;
for (Entry<Character, String> entry : namespacesMap.entrySet()) {
String namespace = entry.getValue();
if (id.startsWith(namespace)) {
prefix = entry.getKey();
localName = id.substring(namespace.length());
break;
}
}
String hexed = addHexEncoding(localName);
String cleaned = addSingleCharacterConversions(hexed,
PATH_SINGLE_CHARACTER_SOURCES, PATH_SINGLE_CHARACTER_TARGETS);
String prefixed = applyPrefixChar(prefix, cleaned);
return insertPathDelimiters(prefixed);
}
/**
* Now that the cleaning is complete, add the prefix if there is one.
*/
private static String applyPrefixChar(char prefix, String cleaned) {
if (prefix == 0) {
return cleaned;
} else {
return prefix + "~" + cleaned;
}
}
/**
* Add path delimiters as needed to turn the cleaned prefixed string into a
* relative path.
*/
private static String insertPathDelimiters(String prefixed) {
StringBuilder path = new StringBuilder();
for (int i = 0; i < prefixed.length(); i++) {
if ((i % SHORTY_LENGTH == 0) && (i > 0)) {
path.append(File.separatorChar);
}
path.append(prefixed.charAt(i));
}
return path.toString();
}
/**
* Translate the object ID and the file storage root directory into a full
* path to the directory that would represent that ID.
*
* @see edu.cornell.mannlib.vitro.webapp.utils.filestorage
*/
public static File getPathToIdDirectory(String id,
Map<Character, String> namespacesMap, File rootDir) {
return new File(rootDir, id2Path(id, namespacesMap));
}
/**
* Translate the object ID, the file storage root directory and the filename
* into a full path to where the file would be stored.
*
* @see edu.cornell.mannlib.vitro.webapp.utils.filestorage
*/
public static File getFullPath(File rootDir, String id, String filename,
Map<Character, String> namespacesMap) {
return new File(getPathToIdDirectory(id, namespacesMap, rootDir),
encodeName(filename));
}
/**
* Translate the configuration property for maximum file size from a
* <code>String</code> to a <code>long</code>.
*
* The string must be represent a positive integer, optionally followed by
* "K", "M", or "G" (to indicate kilobytes, megabytes, or gigabytes).
*/
public static long parseMaximumFileSize(String fileSizeString) {
long factor = 1L;
String integerString;
int shorter = fileSizeString.length() - 1;
if (fileSizeString.endsWith("K")) {
factor = 1024L;
integerString = fileSizeString.substring(0, shorter);
} else if (fileSizeString.endsWith("M")) {
factor = 1024L * 1024L;
integerString = fileSizeString.substring(0, shorter);
} else if (fileSizeString.endsWith("G")) {
factor = 1024L * 1024L * 1024L;
integerString = fileSizeString.substring(0, shorter);
} else {
integerString = fileSizeString;
}
long value = 0;
try {
value = Long.parseLong(integerString);
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Maximum file size is invalid: '" + fileSizeString
+ "'. Must be a positive integer, "
+ "optionally followed by 'K', 'M', or 'G'");
}
if (value <= 0L) {
throw new IllegalArgumentException(
"Maximum file size must be more than 0: '" + fileSizeString
+ "'");
}
return value * factor;
}
}

View file

@ -4,8 +4,10 @@ package edu.cornell.mannlib.vitro.webapp.utils.filestorage;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
@ -32,6 +34,7 @@ public class FileStorageImpl implements FileStorage {
private final File baseDir;
private final File rootDir;
private final File namespaceFile;
private final long maximumFileSize;
private final Map<Character, String> namespacesMap;
// ----------------------------------------------------------------------
@ -50,7 +53,7 @@ public class FileStorageImpl implements FileStorage {
* missing, or if it isn't in the expected form.
*/
FileStorageImpl() throws IOException {
this(figureBaseDir(), figureFileNamespace());
this(figureBaseDir(), figureFileNamespace(), figureMaximumFileSize());
}
/**
@ -62,17 +65,20 @@ public class FileStorageImpl implements FileStorage {
* if the configuration property doesn't point to an existing,
* writeable directory.
*/
FileStorageImpl(File baseDir, Collection<String> namespaces)
throws IOException {
FileStorageImpl(File baseDir, Collection<String> namespaces,
long maximumFileSize) throws IOException {
checkBaseDirValid(baseDir);
checkNamespacesValid(namespaces);
checkMaximumFileSizeValid(maximumFileSize);
this.baseDir = baseDir;
this.rootDir = new File(baseDir, "file_storage_root");
this.rootDir = new File(this.baseDir, "file_storage_root");
this.namespaceFile = new File(baseDir,
"file_storage_namespaces.properties");
this.maximumFileSize = maximumFileSize;
if (rootDir.exists() && namespaceFile.exists()) {
this.namespacesMap = confirmNamespaces(namespaces);
} else if (!rootDir.exists() && !namespaceFile.exists()) {
@ -91,6 +97,13 @@ public class FileStorageImpl implements FileStorage {
}
}
private void checkMaximumFileSizeValid(long maximumFileSize) {
if (maximumFileSize < 0) {
throw new IllegalArgumentException(
"Maximum file size may not be negative.");
}
}
private void checkNamespacesValid(Collection<String> namespaces) {
if (namespaces == null) {
throw new NullPointerException("namespaces may not be null.");
@ -172,6 +185,23 @@ public class FileStorageImpl implements FileStorage {
return Collections.singleton(fileNamespace);
}
/**
* Get the configuration property for the maximum file size and translate it
* into a long integer. It must be a positive integer, optionally followed
* by "K", "M", or "G" (to indicate kilobytes, megabytes, or gigabytes).
*/
private static long figureMaximumFileSize() {
String fileSizeString = ConfigurationProperties
.getProperty(PROPERTY_FILE_MAXIMUM_SIZE);
if (fileSizeString == null) {
throw new IllegalArgumentException(
"Configuration properties must contain a value for '"
+ PROPERTY_FILE_MAXIMUM_SIZE + "'");
}
return FileStorageHelper.parseMaximumFileSize(fileSizeString);
}
/**
* Assign arbitrary prefixes to these namespaces.
*/
@ -263,7 +293,8 @@ public class FileStorageImpl implements FileStorage {
}
File file = FileStorageHelper.getFullPath(this.rootDir, id, filename);
File file = FileStorageHelper.getFullPath(this.rootDir, id, filename,
this.namespacesMap);
OutputStream out = null;
try {
@ -297,7 +328,7 @@ public class FileStorageImpl implements FileStorage {
}
File file = FileStorageHelper.getFullPath(this.rootDir, id,
existingFilename);
existingFilename, this.namespacesMap);
file.delete();
if (file.exists()) {
@ -317,7 +348,8 @@ public class FileStorageImpl implements FileStorage {
*/
@Override
public String getFilename(String id) throws IOException {
File dir = FileStorageHelper.getPathToIdDirectory(this.rootDir, id);
File dir = FileStorageHelper.getPathToIdDirectory(id,
this.namespacesMap, this.rootDir);
if ((!dir.exists()) || (!dir.isDirectory())) {
return null;
@ -344,16 +376,44 @@ public class FileStorageImpl implements FileStorage {
/**
* {@inheritDoc}
*
* @throws IOException
* if the file is larger than the maximum allowable size.
*/
@Override
public byte[] getfile(String id, String filename)
throws FileNotFoundException, IOException {
// gets the bytes from the file
// throws FileNotFoundException if the file does not exist
// throws IOException
public byte[] getFile(String id, String filename) throws IOException {
// TODO Auto-generated method stub
throw new RuntimeException("FileStorage.getfile() not implemented.");
File file = FileStorageHelper.getFullPath(this.rootDir, id, filename,
this.namespacesMap);
if (!file.exists()) {
throw new FileNotFoundException("No file exists with ID '" + id
+ "', file location '" + file + "'");
}
InputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(file));
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int howMany;
while (-1 != (howMany = in.read(buffer))) {
if (bytes.size() > this.maximumFileSize) {
throw new IOException("File is too large at this ID: '"
+ id + "', file location '" + file + "'");
}
bytes.write(buffer, 0, howMany);
}
bytes.close();
return bytes.toByteArray();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}

View file

@ -0,0 +1,27 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.utils.filestorage;
/**
* Indicates that an object ID contains an invalid character.
*/
public class InvalidCharacterException extends RuntimeException {
private final char invalid;
private final int position;
private final String context;
public InvalidCharacterException(char invalid, int position, String context) {
this.invalid = invalid;
this.position = position;
this.context = context;
}
@Override
public String getMessage() {
return String.format(
"Invalid character '%1$c'(0x%1$x) at position %2$d in '%3$s'",
(int)invalid, position, context);
}
}

View file

@ -0,0 +1,27 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.utils.filestorage;
/**
* Indicates a PairTree path ("ppath" or "relative path") that is not correctly
* formed, and cannot be converted to an object ID.
*/
public class InvalidPathException extends RuntimeException {
public InvalidPathException() {
super();
}
public InvalidPathException(String message) {
super(message);
}
public InvalidPathException(Throwable cause) {
super(cause);
}
public InvalidPathException(String message, Throwable cause) {
super(message, cause);
}
}

View file

@ -44,6 +44,9 @@
* A namespace/prefix capability will be used to shorten file paths,
* but with more flexibility than the prefix algorithm given in the specification.
* </li>
* <li>
* "shorty" directory names may be up to 3 characters long, not 2.
* </li>
* </ul>
* </p>
*
@ -91,8 +94,61 @@
* </p>
*
* <h1>ID encoding</h1>
*
*
* <p>
* This is a multi-step process:
* <ul>
* <li>
* <strong>Namespace recognition</strong> -
* If the ID begins with a recognized namespace, then that namespace is
* stripped from the ID, and the prefix associated with that namespace
* is set aside for later in the process.
* </li>
* <li>
* <strong>Rare character encoding</strong> -
* Illegal characters are translated to their hexadecimal equivalents,
* as are some rarely used characters which will be given other
* purposes later in the process. The translated characters include any
* octet outside of the visible ASCII range (21-7e), and these additional
* characters:
* <pre> " * + , &lt; = &gt; ? ^ | \ ~ </pre>
* The hexadecimal encoding consists of a caret followed by 2 hex digits,
* e.g.: ^7C
* </li>
* <li>
* <strong>Common character encoding</strong> -
* To keep the file paths short and readable, characters that are used
* commonly in IDs but may be illegal in the file system are translated
* to a single, lesser-used character.
* <ul>
* <li> / becomes = </li>
* <li> : becomes + </li>
* <li> . becomes , </li>
* </ul>
* </li>
* <li>
* <strong>Prefixing</strong> -
* If a namespace was recognized on the ID in the first step, the
* associated prefix letter will be prepended to the string, with a
* tilde separator.
* </li>
* <li>
* <strong>Path breakdown</strong> -
* Finally, path separator characters are inserted after every third
* character in the processed ID string.
* </li>
* </ul>
* Examples:
* <br/><code>ark:/13030/xt12t3</code> becomes
* <code>ark/+=1/303/0=x/t12/t3</code>
* <br/><code>http://n2t.info/urn:nbn:se:kb:repos-1</code> becomes
* <code>htt/p+=/=n2/t,i/nfo/=ur/n+n/bn+/se+/kb+/rep/os-/1</code>
* <br/><code>what-the-*@?#!^!~?</code> becomes
* <code>wha/t-t/he-/^2a/@^3/f#!/^5e/!^7/e^3/f</code>
* <br/><code>http://vivo.myDomain.edu/file/n3424</code> with namespace
* <code>http://vivo.myDomain.edu/file/</code> and prefix
* <code>a</code> becomes
* <code>a~n/342/4</code>
* </p>
*
* <h1>Filename encoding</h1>
@ -103,6 +159,10 @@
* to be required, since few files are named with the special characters.
* </p>
*
* <p>
* The encoding process is the same as the "rare character encoding" and
* "common character encoding" steps used for ID encoding.
* </p>
*/
package edu.cornell.mannlib.vitro.webapp.utils.filestorage;