Replace old, undistributed CSV parser library with Commons CSV
This commit is contained in:
parent
47a18844c4
commit
45bad8174d
4 changed files with 83 additions and 80 deletions
|
@ -141,7 +141,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) throws Exception {
|
public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) throws Exception {
|
||||||
char[] quoteChars = {'"'};
|
char quoteChar = '"';
|
||||||
String namespace = "";
|
String namespace = "";
|
||||||
String tboxNamespace = vreq.getParameter("tboxNamespace");
|
String tboxNamespace = vreq.getParameter("tboxNamespace");
|
||||||
String typeName = vreq.getParameter("typeName");
|
String typeName = vreq.getParameter("typeName");
|
||||||
|
@ -162,7 +162,7 @@ public class JenaCsv2RdfController extends JenaIngestController {
|
||||||
separatorChar = '\t';
|
separatorChar = '\t';
|
||||||
}
|
}
|
||||||
|
|
||||||
Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChars,namespace,tboxNamespace,typeName);
|
Csv2Rdf c2r = new Csv2Rdf(separatorChar, quoteChar,namespace,tboxNamespace,typeName);
|
||||||
|
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
|
|
||||||
|
|
|
@ -5,14 +5,16 @@ package edu.cornell.mannlib.vitro.webapp.ontology.update;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.StringTokenizer;
|
import java.util.StringTokenizer;
|
||||||
|
|
||||||
|
import org.apache.commons.csv.CSVFormat;
|
||||||
|
import org.apache.commons.csv.CSVParser;
|
||||||
|
import org.apache.commons.csv.CSVRecord;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.skife.csv.CSVReader;
|
|
||||||
import org.skife.csv.SimpleReader;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.ontology.update.AtomicOntologyChange.AtomicChangeType;
|
import edu.cornell.mannlib.vitro.webapp.ontology.update.AtomicOntologyChange.AtomicChangeType;
|
||||||
|
|
||||||
|
@ -35,7 +37,6 @@ public class OntologyChangeParser {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param diffPath Diff path
|
* @param diffPath Diff path
|
||||||
* @throws IOException
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@SuppressWarnings({ "unchecked", "null", "static-access" })
|
@SuppressWarnings({ "unchecked", "null", "static-access" })
|
||||||
|
@ -50,53 +51,54 @@ public class OntologyChangeParser {
|
||||||
String destinationURI = null;
|
String destinationURI = null;
|
||||||
StringTokenizer stArr = null;
|
StringTokenizer stArr = null;
|
||||||
FileInputStream in = new FileInputStream(new File(diffPath));
|
FileInputStream in = new FileInputStream(new File(diffPath));
|
||||||
CSVReader readFile = new SimpleReader();
|
|
||||||
readFile.setSeperator('\t');
|
|
||||||
|
|
||||||
List<String[]> rows = readFile.parse(in);
|
CSVParser readFile = new CSVParser(new InputStreamReader(in),
|
||||||
|
CSVFormat.DEFAULT.withRecordSeparator('\t'));
|
||||||
|
|
||||||
for(int rowNum = 0; rowNum < rows.size(); rowNum++){
|
int rowNum = 0;
|
||||||
|
for (CSVRecord record : readFile) {
|
||||||
String[] cols = rows.get(rowNum);
|
rowNum++;
|
||||||
if (cols.length != 5) {
|
if (record.size() != 5) {
|
||||||
logger.logError("Invalid PromptDiff data at row " + (rowNum + 1)
|
logger.logError("Invalid PromptDiff data at row " + (rowNum)
|
||||||
+ ". Expected 5 columns; found " + cols.length );
|
+ ". Expected 5 columns; found " + record.size() );
|
||||||
} else {
|
} else {
|
||||||
|
String col = null;
|
||||||
changeObj = new AtomicOntologyChange();
|
changeObj = new AtomicOntologyChange();
|
||||||
|
|
||||||
if (cols[0] != null && cols[0].length() > 0) {
|
col = record.get(0);
|
||||||
changeObj.setSourceURI(cols[0]);
|
if (col != null && col.length() > 0) {
|
||||||
|
changeObj.setSourceURI(col);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cols[1] != null && cols[1].length() > 0) {
|
col = record.get(1);
|
||||||
changeObj.setDestinationURI(cols[1]);
|
if (col != null && col.length() > 0) {
|
||||||
|
changeObj.setDestinationURI(col);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cols[4] != null && cols[4].length() > 0) {
|
col = record.get(4);
|
||||||
changeObj.setNotes(cols[4]);
|
if (col != null && col.length() > 0) {
|
||||||
|
changeObj.setNotes(col);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ("Yes".equals(cols[2])) {
|
if ("Yes".equals(record.get(2))) {
|
||||||
changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
|
changeObj.setAtomicChangeType(AtomicChangeType.RENAME);
|
||||||
} else if ("Delete".equals(cols[3])) {
|
} else if ("Delete".equals(record.get(3))) {
|
||||||
changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
|
changeObj.setAtomicChangeType(AtomicChangeType.DELETE);
|
||||||
} else if ("Add".equals(cols[3])) {
|
} else if ("Add".equals(record.get(3))) {
|
||||||
changeObj.setAtomicChangeType(AtomicChangeType.ADD);
|
changeObj.setAtomicChangeType(AtomicChangeType.ADD);
|
||||||
} else {
|
} else {
|
||||||
logger.logError("Invalid rename or change type data: '" +
|
logger.logError("Invalid rename or change type data: '" +
|
||||||
cols[2] + " " + cols[3] + "'");
|
record.get(2) + " " + record.get(3) + "'");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
log.debug(changeObj);
|
log.debug(changeObj);
|
||||||
|
|
||||||
changeObjects.add(changeObj);
|
changeObjects.add(changeObj);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
readFile.close();
|
||||||
|
|
||||||
if (changeObjects.size() == 0) {
|
if (changeObjects.size() == 0) {
|
||||||
logger.log("No ABox updates are required.");
|
logger.log("No ABox updates are required.");
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,14 +4,15 @@ package edu.cornell.mannlib.vitro.webapp.utils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
|
||||||
import org.skife.csv.CSVReader;
|
import org.apache.commons.csv.CSVFormat;
|
||||||
import org.skife.csv.SimpleReader;
|
import org.apache.commons.csv.CSVParser;
|
||||||
|
import org.apache.commons.csv.CSVRecord;
|
||||||
import org.apache.jena.datatypes.xsd.XSDDatatype;
|
import org.apache.jena.datatypes.xsd.XSDDatatype;
|
||||||
import org.apache.jena.ontology.DatatypeProperty;
|
import org.apache.jena.ontology.DatatypeProperty;
|
||||||
import org.apache.jena.ontology.Individual;
|
import org.apache.jena.ontology.Individual;
|
||||||
|
@ -34,11 +35,11 @@ public class Csv2Rdf {
|
||||||
private String individualNameBase;
|
private String individualNameBase;
|
||||||
private String propertyNameBase;
|
private String propertyNameBase;
|
||||||
private char separatorChar;
|
private char separatorChar;
|
||||||
private char[] quoteChars;
|
private char quoteChar;
|
||||||
|
|
||||||
public Csv2Rdf(char[] quoteChars, String namespace, String tboxNamespace, String typeName) {
|
public Csv2Rdf(char quoteChar, String namespace, String tboxNamespace, String typeName) {
|
||||||
this.separatorChar = ',';
|
this.separatorChar = ',';
|
||||||
this.quoteChars = quoteChars;
|
this.quoteChar = quoteChar;
|
||||||
this.namespace = namespace;
|
this.namespace = namespace;
|
||||||
this.tboxNamespace = tboxNamespace;
|
this.tboxNamespace = tboxNamespace;
|
||||||
this.typeName = typeName;
|
this.typeName = typeName;
|
||||||
|
@ -46,9 +47,9 @@ public class Csv2Rdf {
|
||||||
this.propertyNameBase = individualNameBase+"_";
|
this.propertyNameBase = individualNameBase+"_";
|
||||||
}
|
}
|
||||||
|
|
||||||
public Csv2Rdf(char separatorChar, char[] quoteChars, String namespace, String tboxNamespace, String typeName) {
|
public Csv2Rdf(char separatorChar, char quoteChar, String namespace, String tboxNamespace, String typeName) {
|
||||||
this.separatorChar = separatorChar;
|
this.separatorChar = separatorChar;
|
||||||
this.quoteChars = quoteChars;
|
this.quoteChar = quoteChar;
|
||||||
this.namespace = namespace;
|
this.namespace = namespace;
|
||||||
this.tboxNamespace = tboxNamespace;
|
this.tboxNamespace = tboxNamespace;
|
||||||
this.typeName = typeName;
|
this.typeName = typeName;
|
||||||
|
@ -66,47 +67,48 @@ public class Csv2Rdf {
|
||||||
ontModel.addSubModel(tboxOntModel);
|
ontModel.addSubModel(tboxOntModel);
|
||||||
OntClass theClass = tboxOntModel.createClass(tboxNamespace+typeName);
|
OntClass theClass = tboxOntModel.createClass(tboxNamespace+typeName);
|
||||||
|
|
||||||
CSVReader cReader = new SimpleReader();
|
|
||||||
cReader.setSeperator(separatorChar);
|
|
||||||
cReader.setQuoteCharacters(quoteChars);
|
|
||||||
|
|
||||||
URIGenerator uriGen = (wadf != null && destination != null)
|
URIGenerator uriGen = (wadf != null && destination != null)
|
||||||
? new RandomURIGenerator(wadf, destination)
|
? new RandomURIGenerator(wadf, destination)
|
||||||
: new SequentialURIGenerator();
|
: new SequentialURIGenerator();
|
||||||
|
|
||||||
List<String[]> fileRows = cReader.parse(fis);
|
CSVParser cReader = new CSVParser(new InputStreamReader(fis),
|
||||||
|
CSVFormat.DEFAULT.withRecordSeparator(separatorChar)
|
||||||
|
.withQuote(quoteChar));
|
||||||
|
|
||||||
String[] columnHeaders = fileRows.get(0);
|
DatatypeProperty[] dpArray = null;
|
||||||
|
|
||||||
DatatypeProperty[] dpArray = new DatatypeProperty[columnHeaders.length];
|
for (CSVRecord cRecord : cReader) {
|
||||||
|
if (dpArray == null) {
|
||||||
|
dpArray = new DatatypeProperty[cRecord.size()];
|
||||||
|
|
||||||
for (int i=0; i<columnHeaders.length; i++) {
|
for (int i = 0; i < dpArray.length; i++) {
|
||||||
dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+columnHeaders[i].replaceAll("\\W",""));
|
dpArray[i] = tboxOntModel.createDatatypeProperty(tboxNamespace+propertyNameBase+cRecord.get(i).replaceAll("\\W",""));
|
||||||
}
|
}
|
||||||
Individual ind = null;
|
} else {
|
||||||
for (int row=1; row<fileRows.size(); row++) {
|
Individual ind = null;
|
||||||
String uri = uriGen.getNextURI();
|
String uri = uriGen.getNextURI();
|
||||||
if(uri!=null)
|
if (uri!=null) {
|
||||||
ind = ontModel.createIndividual(uri,theClass);
|
ind = ontModel.createIndividual(uri, theClass);
|
||||||
else
|
} else {
|
||||||
ind = ontModel.createIndividual(theClass);
|
ind = ontModel.createIndividual(theClass);
|
||||||
String[] cols = fileRows.get(row);
|
}
|
||||||
for (int col=0; col<cols.length; col++) {
|
for (int col = 0; col<cRecord.size() && col < dpArray.length; col++) {
|
||||||
String value = cols[col].trim();
|
String value = cRecord.get(col).trim();
|
||||||
if (value.length()>0) {
|
if (value.length()>0) {
|
||||||
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring);
|
ind.addProperty(dpArray[col], value); // no longer using: , XSDDatatype.XSDstring);
|
||||||
// TODO: specification of datatypes for columns
|
// TODO: specification of datatypes for columns
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cReader.close();
|
||||||
ontModel.removeSubModel(tboxOntModel);
|
ontModel.removeSubModel(tboxOntModel);
|
||||||
|
|
||||||
Model[] resultModels = new Model[2];
|
Model[] resultModels = new Model[2];
|
||||||
resultModels[0] = ontModel;
|
resultModels[0] = ontModel;
|
||||||
resultModels[1] = tboxOntModel;
|
resultModels[1] = tboxOntModel;
|
||||||
return resultModels;
|
return resultModels;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private interface URIGenerator {
|
private interface URIGenerator {
|
||||||
|
|
11
dependencies/pom.xml
vendored
11
dependencies/pom.xml
vendored
|
@ -111,6 +111,11 @@
|
||||||
<artifactId>commons-lang3</artifactId>
|
<artifactId>commons-lang3</artifactId>
|
||||||
<version>3.4</version>
|
<version>3.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-csv</artifactId>
|
||||||
|
<version>1.4</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>commons-logging</groupId>
|
<groupId>commons-logging</groupId>
|
||||||
<artifactId>commons-logging</artifactId>
|
<artifactId>commons-logging</artifactId>
|
||||||
|
@ -243,12 +248,6 @@
|
||||||
<version>1.2.1</version>
|
<version>1.2.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.vivoweb.dependencies</groupId>
|
|
||||||
<artifactId>csv</artifactId>
|
|
||||||
<version>1.0</version>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>javax.json</groupId>
|
<groupId>javax.json</groupId>
|
||||||
<artifactId>javax.json-api</artifactId>
|
<artifactId>javax.json-api</artifactId>
|
||||||
|
|
Loading…
Add table
Reference in a new issue