Change CSV parser
This commit is contained in:
parent
ce85c363dc
commit
3a3cac9513
1 changed files with 37 additions and 62 deletions
|
@ -6,18 +6,20 @@ import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.csv.CSVFormat;
|
||||||
|
import org.apache.commons.csv.CSVParser;
|
||||||
|
import org.apache.commons.csv.CSVRecord;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.skife.csv.SimpleReader;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
|
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of FileHarvestJob that can be used for any CSV file harvest.
|
* An implementation of FileHarvestJob that can be used for any CSV file harvest.
|
||||||
*/
|
*/
|
||||||
|
@ -214,75 +216,48 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
public String validateUpload(File file) {
|
public String validateUpload(File file) {
|
||||||
try {
|
try {
|
||||||
SimpleReader reader = new SimpleReader();
|
String message = "No data in file";
|
||||||
|
CSVParser cReader = new CSVParser(new FileReader(file), CSVFormat.DEFAULT);
|
||||||
List templateCsv = reader.parse(this.templateFile);
|
try {
|
||||||
String[] templateFirstLine = (String[])templateCsv.get(0);
|
int rowNum = 0;
|
||||||
|
int numberFields = 0;
|
||||||
//if a line ends in a comma (absolutely a comma, no whitespace), SimpleReader will not consider the part after the comma to be a blank section.
|
String errorMsg = "File header does not match template";
|
||||||
List csv = reader.parse(file);
|
for (CSVRecord cRecord : cReader) {
|
||||||
boolean[] linesEndingInComma = getLinesEndingInComma(file);
|
rowNum++;
|
||||||
|
if (false) {
|
||||||
int length = csv.size();
|
numberFields = cRecord.size();
|
||||||
|
errorMsg += "file header items: ";
|
||||||
if(length == 0)
|
for(int i = 0; i < cRecord.size(); i++) {
|
||||||
return "No data in file";
|
errorMsg += cRecord.get(i) + ", ";
|
||||||
|
}
|
||||||
for(int i = 0; i < length; i++) {
|
} else {
|
||||||
String[] line = (String[])csv.get(i);
|
if (cRecord.size() > 0) {
|
||||||
boolean endsInComma = linesEndingInComma[i];
|
if(cRecord.size() != numberFields) {
|
||||||
if(i == 0) {
|
if (errorMsg != null) {
|
||||||
String errorMessage = validateCsvFirstLine(templateFirstLine, line);
|
errorMsg += "template items: ";
|
||||||
if(errorMessage != null)
|
for(int i = 0; i < cRecord.size(); i++) {
|
||||||
return errorMessage;
|
errorMsg += cRecord.get(i) + ", ";
|
||||||
}
|
}
|
||||||
else if(line.length != 0) {
|
return errorMsg;
|
||||||
int actualLineLength = line.length + (endsInComma ? 1 : 0);
|
}
|
||||||
if(actualLineLength != templateFirstLine.length) {
|
return "Mismatch in number of entries in row " + rowNum + ": expected " + numberFields + ", found " + cRecord.size();
|
||||||
return "Mismatch in number of entries in row " + i + ": expected " + templateFirstLine.length + ", found " + actualLineLength;
|
}
|
||||||
|
}
|
||||||
|
message = null;
|
||||||
|
errorMsg = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
return message;
|
||||||
|
} finally {
|
||||||
|
cReader.close();
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
return e.getMessage();
|
return e.getMessage();
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes sure that the first line of the CSV file is identical to the first line of the template file. This is
|
|
||||||
* assuming we are expecting all user CSV files to contain an initial header line. If this is not the case, then
|
|
||||||
* this method is unnecessary.
|
|
||||||
* @param templateFirstLine the parsed-out contents of the first line of the template file
|
|
||||||
* @param line the parsed-out contents of the first line of the input file
|
|
||||||
* @return an error message if the two lines don't match, or null if they do
|
|
||||||
*/
|
|
||||||
private String validateCsvFirstLine(String[] templateFirstLine, String[] line) {
|
|
||||||
String errorMessage = "File header does not match template";
|
|
||||||
if(line.length != templateFirstLine.length) {
|
|
||||||
//return errorMessage + ": " + "file header columns = " + line.length + ", template columns = " + templateFirstLine.length;
|
|
||||||
String errorMsg = "";
|
|
||||||
errorMsg += "file header items: ";
|
|
||||||
for(int i = 0; i < line.length; i++) {
|
|
||||||
errorMsg += line[i] + ", ";
|
|
||||||
}
|
|
||||||
errorMsg += "template items: ";
|
|
||||||
for(int i = 0; i < templateFirstLine.length; i++) {
|
|
||||||
errorMsg += templateFirstLine[i] + ", ";
|
|
||||||
}
|
|
||||||
return errorMsg;
|
|
||||||
}
|
|
||||||
for(int i = 0; i < line.length; i++)
|
|
||||||
{
|
|
||||||
if(!line[i].equals(templateFirstLine[i]))
|
|
||||||
return errorMessage + ": file header column " + (i + 1) + " = " + line[i] + ", template column " + (i + 1) + " = " + templateFirstLine[i];
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
private void prepareWorkspaceDirectory() {
|
private void prepareWorkspaceDirectory() {
|
||||||
String path = FileHarvestController.getFileHarvestRootPath() + "workspaces/" + this.sessionId;
|
String path = FileHarvestController.getFileHarvestRootPath() + "workspaces/" + this.sessionId;
|
||||||
|
|
Loading…
Add table
Reference in a new issue