First cut at the SDB - TDB converter: fails on large models (>2M triples)
This commit is contained in:
parent
89416efa77
commit
06be40e699
15 changed files with 291 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -10,3 +10,4 @@
|
||||||
.build
|
.build
|
||||||
utilities/solrtester/.work
|
utilities/solrtester/.work
|
||||||
utilities/rdbmigration/.work
|
utilities/rdbmigration/.work
|
||||||
|
utilities/sdb_to_tdb/.work
|
||||||
|
|
19
utilities/sdb_to_tdb/README.txt
Normal file
19
utilities/sdb_to_tdb/README.txt
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
A tool to convert triples from SDB to TDB.
|
||||||
|
|
||||||
|
Unlike the RDF-migration tool, this tool is not tied to the VIVO
|
||||||
|
properties files. Instead, you must provide:
|
||||||
|
* The URL for the SDB, including username and password parameters
|
||||||
|
* The directory path for the TDB.
|
||||||
|
The directory must exist.
|
||||||
|
The directory must be empty, unless the "force" optiopn is used.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
java -jar sdb2tdb.jar \
|
||||||
|
jdbc:mysql://localhost/vitrodb?user=vivoUser&password=vivoPass \
|
||||||
|
/usr/local/my/tdb
|
||||||
|
|
||||||
|
java -Xms512m -Xmx4096m -jar .work/sdb2tdb.jar \
|
||||||
|
'jdbc:mysql://localhost/weill17?user=vivoUser&password=vivoPass' \
|
||||||
|
/Users/jeb228/Testing/instances/weill-develop/vivo_home/contentTdb \
|
||||||
|
force
|
89
utilities/sdb_to_tdb/build.xml
Normal file
89
utilities/sdb_to_tdb/build.xml
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
||||||
|
<!-- $This file is distributed under the terms of the license in /doc/license.txt$ -->
|
||||||
|
|
||||||
|
<!-- ======================================================================
|
||||||
|
A tool to convert SDB data into TDB.
|
||||||
|
====================================================================== -->
|
||||||
|
|
||||||
|
<project name="SDB-to-TDB" default="describe">
|
||||||
|
|
||||||
|
<property name="working.dir" location=".work" />
|
||||||
|
<property name="src.dir" location="src" />
|
||||||
|
<property name="lib.dir" location="lib" />
|
||||||
|
<property name="classes.dir" location="${working.dir}/classes" />
|
||||||
|
<property name="jar.file" location="${working.dir}/sdb2tdb.jar" />
|
||||||
|
|
||||||
|
<!-- =================================
|
||||||
|
target: describe
|
||||||
|
================================= -->
|
||||||
|
<target name="describe"
|
||||||
|
description="--> Describe the targets (this is the default).">
|
||||||
|
<echo>
|
||||||
|
all - Compiles the tool ad packs it into a JAR
|
||||||
|
</echo>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- =================================
|
||||||
|
target: all
|
||||||
|
================================= -->
|
||||||
|
<target name="all"
|
||||||
|
depends="clean, jar"
|
||||||
|
description="Build from scratch and run the migration.">
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- - - - - - - - - - - - - - - - - -
|
||||||
|
target: clean
|
||||||
|
- - - - - - - - - - - - - - - - - -->
|
||||||
|
<target name="clean">
|
||||||
|
<delete dir="${working.dir}" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- - - - - - - - - - - - - - - - - -
|
||||||
|
target: prepare
|
||||||
|
- - - - - - - - - - - - - - - - - -->
|
||||||
|
<target name="prepare">
|
||||||
|
<mkdir dir="${working.dir}" />
|
||||||
|
<mkdir dir="${classes.dir}" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- - - - - - - - - - - - - - - - - -
|
||||||
|
target: compile
|
||||||
|
- - - - - - - - - - - - - - - - - -->
|
||||||
|
<target name="compile" depends="prepare">
|
||||||
|
<path id="main.compile.classpath">
|
||||||
|
<fileset dir="${lib.dir}" includes="*.jar" />
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<javac srcdir="${src.dir}"
|
||||||
|
destdir="${classes.dir}"
|
||||||
|
debug="true"
|
||||||
|
deprecation="true"
|
||||||
|
encoding="UTF8"
|
||||||
|
includeantruntime="false"
|
||||||
|
optimize="true"
|
||||||
|
source="1.7">
|
||||||
|
<classpath refid="main.compile.classpath" />
|
||||||
|
</javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- - - - - - - - - - - - - - - - - -
|
||||||
|
target: jar
|
||||||
|
- - - - - - - - - - - - - - - - - -->
|
||||||
|
<target name="jar" depends="compile">
|
||||||
|
<jar destfile="${jar.file}">
|
||||||
|
<manifest>
|
||||||
|
<attribute name="Main-Class"
|
||||||
|
value="edu.cornell.mannlib.vitro.utilities.sdb2tdb.Sdb2Tdb" />
|
||||||
|
</manifest>
|
||||||
|
<fileset dir="${classes.dir}" />
|
||||||
|
<archives>
|
||||||
|
<zips>
|
||||||
|
<fileset dir="${lib.dir}" includes="**/*.jar" />
|
||||||
|
</zips>
|
||||||
|
</archives>
|
||||||
|
</jar>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
</project>
|
||||||
|
|
BIN
utilities/sdb_to_tdb/lib/jena-arq-2.10.1.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-arq-2.10.1.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-core-2.10.1.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-core-2.10.1.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-iri-0.9.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-iri-0.9.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-sdb-1.3.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-sdb-1.3.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-tdb-0.10.0.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-tdb-0.10.0.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/log4j-1.2.16.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/log4j-1.2.16.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/mysql-connector-java-5.1.30-bin.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/mysql-connector-java-5.1.30-bin.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/slf4j-api-1.6.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/slf4j-api-1.6.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/slf4j-log4j12-1.6.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/slf4j-log4j12-1.6.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/xercesImpl-2.11.0.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/xercesImpl-2.11.0.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/xml-apis-1.4.01.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/xml-apis-1.4.01.jar
Normal file
Binary file not shown.
|
@ -0,0 +1,182 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.utilities.sdb2tdb;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FilenameFilter;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.hp.hpl.jena.query.Dataset;
|
||||||
|
import com.hp.hpl.jena.rdf.model.Model;
|
||||||
|
import com.hp.hpl.jena.sdb.SDBFactory;
|
||||||
|
import com.hp.hpl.jena.sdb.Store;
|
||||||
|
import com.hp.hpl.jena.sdb.StoreDesc;
|
||||||
|
import com.hp.hpl.jena.sdb.store.DatabaseType;
|
||||||
|
import com.hp.hpl.jena.sdb.store.LayoutType;
|
||||||
|
import com.hp.hpl.jena.tdb.TDBFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <pre>
|
||||||
|
* java -jar sdb2tdb.jar \
|
||||||
|
* 'jdbc:mysql://localhost/vitrodb?user=vivoUser&password=vivoPass'\
|
||||||
|
* /usr/local/my/tdb
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
public class Sdb2Tdb {
|
||||||
|
private final String driverClassName;
|
||||||
|
private final String jdbcUrl;
|
||||||
|
private final String destination;
|
||||||
|
private final boolean force;
|
||||||
|
|
||||||
|
private Dataset sdbDataset;
|
||||||
|
private Dataset tdbDataset;
|
||||||
|
|
||||||
|
public Sdb2Tdb(List<String> hardArgs) throws UsageException {
|
||||||
|
List<String> args = new ArrayList<>(hardArgs);
|
||||||
|
|
||||||
|
if (!args.isEmpty() && args.indexOf("force") == (args.size() - 1)) {
|
||||||
|
this.force = true;
|
||||||
|
args.remove(args.size() - 1);
|
||||||
|
} else {
|
||||||
|
this.force = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (args.size() == 3) {
|
||||||
|
this.driverClassName = args.remove(0);
|
||||||
|
} else if (args.size() == 2) {
|
||||||
|
this.driverClassName = "com.mysql.jdbc.Driver";
|
||||||
|
} else {
|
||||||
|
throw new UsageException("Wrong number of arguments: "
|
||||||
|
+ hardArgs.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
this.jdbcUrl = args.get(0);
|
||||||
|
this.destination = args.get(1);
|
||||||
|
|
||||||
|
checkDriverClass();
|
||||||
|
checkJdbcUrl();
|
||||||
|
checkDestination();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkDriverClass() throws UsageException {
|
||||||
|
try {
|
||||||
|
Class.forName(this.driverClassName).newInstance();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new UsageException("Can't instantiate JDBC driver: "
|
||||||
|
+ this.driverClassName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkJdbcUrl() {
|
||||||
|
if ((!this.jdbcUrl.matches("\\busername\\b"))
|
||||||
|
|| (!this.jdbcUrl.matches("\\bpassword\\b"))) {
|
||||||
|
System.out.println("\nWARNING: The JDBC url probably should "
|
||||||
|
+ "contain values for username and password.\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkDestination() throws UsageException {
|
||||||
|
File destDir = new File(this.destination);
|
||||||
|
|
||||||
|
if (!destDir.isDirectory()) {
|
||||||
|
throw new UsageException(
|
||||||
|
"The destination directory does not exist: '"
|
||||||
|
+ this.destination + "'");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!destDir.canWrite()) {
|
||||||
|
throw new UsageException("Cannot write to '" + this.destination
|
||||||
|
+ "'");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(this.force || getDestinationFilenames().isEmpty())) {
|
||||||
|
throw new UsageException("The destination directory is not empty. "
|
||||||
|
+ "Choose another destination, or use the 'force' option");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> getDestinationFilenames() {
|
||||||
|
File destDir = new File(this.destination);
|
||||||
|
String[] filenames = destDir.list(new FilenameFilter() {
|
||||||
|
@Override
|
||||||
|
public boolean accept(File dir, String name) {
|
||||||
|
return !(name.equals(".") || name.equals(".."));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return Arrays.asList(filenames);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void translate() throws SQLException {
|
||||||
|
try {
|
||||||
|
sdbDataset = openSdbDataset();
|
||||||
|
tdbDataset = openTdbDataset();
|
||||||
|
copyGraphs();
|
||||||
|
} finally {
|
||||||
|
if (tdbDataset != null) {
|
||||||
|
tdbDataset.close();
|
||||||
|
}
|
||||||
|
if (sdbDataset != null) {
|
||||||
|
sdbDataset.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Dataset openSdbDataset() throws SQLException {
|
||||||
|
Connection conn = DriverManager.getConnection(this.jdbcUrl);
|
||||||
|
Store store = SDBFactory.connectStore(conn, makeSdbStoreDesc());
|
||||||
|
return SDBFactory.connectDataset(store);
|
||||||
|
}
|
||||||
|
|
||||||
|
private StoreDesc makeSdbStoreDesc() {
|
||||||
|
return new StoreDesc(LayoutType.LayoutTripleNodesHash,
|
||||||
|
DatabaseType.MySQL);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Dataset openTdbDataset() {
|
||||||
|
return TDBFactory.createDataset(new File(this.destination)
|
||||||
|
.getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void copyGraphs() {
|
||||||
|
for (Iterator<String> modelNames = sdbDataset.listNames(); modelNames
|
||||||
|
.hasNext();) {
|
||||||
|
String modelName = modelNames.next();
|
||||||
|
Model model = sdbDataset.getNamedModel(modelName);
|
||||||
|
System.out.println(String.format("Copying %6d triples: %s",
|
||||||
|
model.size(), modelName));
|
||||||
|
tdbDataset.addNamedModel(modelName, model);
|
||||||
|
model.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try {
|
||||||
|
Sdb2Tdb sdb2tdb = new Sdb2Tdb(Arrays.asList(args));
|
||||||
|
sdb2tdb.translate();
|
||||||
|
|
||||||
|
} catch (UsageException e) {
|
||||||
|
System.out.println();
|
||||||
|
System.out.println(e.getMessage());
|
||||||
|
System.out.println(e.getProperUsage());
|
||||||
|
System.out.println();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class UsageException extends Exception {
|
||||||
|
public UsageException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getProperUsage() {
|
||||||
|
return "Usage is: java -jar sdb2tdb [driver_class] <jdbcUrl> <destination_directory> [force]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue