Change CSV parser

This commit is contained in:
Graham Triggs 2016-12-20 08:47:21 +00:00
parent ce85c363dc
commit 3a3cac9513

View file

@ -6,18 +6,20 @@ import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.skife.csv.SimpleReader;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
/** /**
* An implementation of FileHarvestJob that can be used for any CSV file harvest. * An implementation of FileHarvestJob that can be used for any CSV file harvest.
*/ */
@ -214,75 +216,48 @@ class CsvFileHarvestJob implements FileHarvestJob {
@SuppressWarnings("rawtypes") @SuppressWarnings("rawtypes")
public String validateUpload(File file) { public String validateUpload(File file) {
try { try {
SimpleReader reader = new SimpleReader(); String message = "No data in file";
CSVParser cReader = new CSVParser(new FileReader(file), CSVFormat.DEFAULT);
List templateCsv = reader.parse(this.templateFile); try {
String[] templateFirstLine = (String[])templateCsv.get(0); int rowNum = 0;
int numberFields = 0;
//if a line ends in a comma (absolutely a comma, no whitespace), SimpleReader will not consider the part after the comma to be a blank section. String errorMsg = "File header does not match template";
List csv = reader.parse(file); for (CSVRecord cRecord : cReader) {
boolean[] linesEndingInComma = getLinesEndingInComma(file); rowNum++;
if (false) {
int length = csv.size(); numberFields = cRecord.size();
errorMsg += "file header items: ";
if(length == 0) for(int i = 0; i < cRecord.size(); i++) {
return "No data in file"; errorMsg += cRecord.get(i) + ", ";
for(int i = 0; i < length; i++) {
String[] line = (String[])csv.get(i);
boolean endsInComma = linesEndingInComma[i];
if(i == 0) {
String errorMessage = validateCsvFirstLine(templateFirstLine, line);
if(errorMessage != null)
return errorMessage;
} }
else if(line.length != 0) { } else {
int actualLineLength = line.length + (endsInComma ? 1 : 0); if (cRecord.size() > 0) {
if(actualLineLength != templateFirstLine.length) { if(cRecord.size() != numberFields) {
return "Mismatch in number of entries in row " + i + ": expected " + templateFirstLine.length + ", found " + actualLineLength; if (errorMsg != null) {
errorMsg += "template items: ";
for(int i = 0; i < cRecord.size(); i++) {
errorMsg += cRecord.get(i) + ", ";
}
return errorMsg;
}
return "Mismatch in number of entries in row " + rowNum + ": expected " + numberFields + ", found " + cRecord.size();
} }
} }
message = null;
errorMsg = null;
}
} }
return message;
} finally {
cReader.close();
}
} catch (IOException e) { } catch (IOException e) {
log.error(e, e); log.error(e, e);
return e.getMessage(); return e.getMessage();
} }
return null;
} }
/**
* Makes sure that the first line of the CSV file is identical to the first line of the template file. This is
* assuming we are expecting all user CSV files to contain an initial header line. If this is not the case, then
* this method is unnecessary.
* @param templateFirstLine the parsed-out contents of the first line of the template file
* @param line the parsed-out contents of the first line of the input file
* @return an error message if the two lines don't match, or null if they do
*/
private String validateCsvFirstLine(String[] templateFirstLine, String[] line) {
String errorMessage = "File header does not match template";
if(line.length != templateFirstLine.length) {
//return errorMessage + ": " + "file header columns = " + line.length + ", template columns = " + templateFirstLine.length;
String errorMsg = "";
errorMsg += "file header items: ";
for(int i = 0; i < line.length; i++) {
errorMsg += line[i] + ", ";
}
errorMsg += "template items: ";
for(int i = 0; i < templateFirstLine.length; i++) {
errorMsg += templateFirstLine[i] + ", ";
}
return errorMsg;
}
for(int i = 0; i < line.length; i++)
{
if(!line[i].equals(templateFirstLine[i]))
return errorMessage + ": file header column " + (i + 1) + " = " + line[i] + ", template column " + (i + 1) + " = " + templateFirstLine[i];
}
return null;
}
/* /*
private void prepareWorkspaceDirectory() { private void prepareWorkspaceDirectory() {
String path = FileHarvestController.getFileHarvestRootPath() + "workspaces/" + this.sessionId; String path = FileHarvestController.getFileHarvestRootPath() + "workspaces/" + this.sessionId;