First cut at the SDB - TDB converter: fails on large models (>2M triples)
This commit is contained in:
parent
89416efa77
commit
06be40e699
15 changed files with 291 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -10,3 +10,4 @@
|
|||
.build
|
||||
utilities/solrtester/.work
|
||||
utilities/rdbmigration/.work
|
||||
utilities/sdb_to_tdb/.work
|
||||
|
|
19
utilities/sdb_to_tdb/README.txt
Normal file
19
utilities/sdb_to_tdb/README.txt
Normal file
|
@ -0,0 +1,19 @@
|
|||
A tool to convert triples from SDB to TDB.
|
||||
|
||||
Unlike the RDF-migration tool, this tool is not tied to the VIVO
|
||||
properties files. Instead, you must provide:
|
||||
* The URL for the SDB, including username and password parameters
|
||||
* The directory path for the TDB.
|
||||
The directory must exist.
|
||||
The directory must be empty, unless the "force" optiopn is used.
|
||||
|
||||
Examples:
|
||||
|
||||
java -jar sdb2tdb.jar \
|
||||
jdbc:mysql://localhost/vitrodb?user=vivoUser&password=vivoPass \
|
||||
/usr/local/my/tdb
|
||||
|
||||
java -Xms512m -Xmx4096m -jar .work/sdb2tdb.jar \
|
||||
'jdbc:mysql://localhost/weill17?user=vivoUser&password=vivoPass' \
|
||||
/Users/jeb228/Testing/instances/weill-develop/vivo_home/contentTdb \
|
||||
force
|
89
utilities/sdb_to_tdb/build.xml
Normal file
89
utilities/sdb_to_tdb/build.xml
Normal file
|
@ -0,0 +1,89 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
||||
<!-- $This file is distributed under the terms of the license in /doc/license.txt$ -->
|
||||
|
||||
<!-- ======================================================================
|
||||
A tool to convert SDB data into TDB.
|
||||
====================================================================== -->
|
||||
|
||||
<project name="SDB-to-TDB" default="describe">
|
||||
|
||||
<property name="working.dir" location=".work" />
|
||||
<property name="src.dir" location="src" />
|
||||
<property name="lib.dir" location="lib" />
|
||||
<property name="classes.dir" location="${working.dir}/classes" />
|
||||
<property name="jar.file" location="${working.dir}/sdb2tdb.jar" />
|
||||
|
||||
<!-- =================================
|
||||
target: describe
|
||||
================================= -->
|
||||
<target name="describe"
|
||||
description="--> Describe the targets (this is the default).">
|
||||
<echo>
|
||||
all - Compiles the tool ad packs it into a JAR
|
||||
</echo>
|
||||
</target>
|
||||
|
||||
<!-- =================================
|
||||
target: all
|
||||
================================= -->
|
||||
<target name="all"
|
||||
depends="clean, jar"
|
||||
description="Build from scratch and run the migration.">
|
||||
</target>
|
||||
|
||||
<!-- - - - - - - - - - - - - - - - - -
|
||||
target: clean
|
||||
- - - - - - - - - - - - - - - - - -->
|
||||
<target name="clean">
|
||||
<delete dir="${working.dir}" />
|
||||
</target>
|
||||
|
||||
<!-- - - - - - - - - - - - - - - - - -
|
||||
target: prepare
|
||||
- - - - - - - - - - - - - - - - - -->
|
||||
<target name="prepare">
|
||||
<mkdir dir="${working.dir}" />
|
||||
<mkdir dir="${classes.dir}" />
|
||||
</target>
|
||||
|
||||
<!-- - - - - - - - - - - - - - - - - -
|
||||
target: compile
|
||||
- - - - - - - - - - - - - - - - - -->
|
||||
<target name="compile" depends="prepare">
|
||||
<path id="main.compile.classpath">
|
||||
<fileset dir="${lib.dir}" includes="*.jar" />
|
||||
</path>
|
||||
|
||||
<javac srcdir="${src.dir}"
|
||||
destdir="${classes.dir}"
|
||||
debug="true"
|
||||
deprecation="true"
|
||||
encoding="UTF8"
|
||||
includeantruntime="false"
|
||||
optimize="true"
|
||||
source="1.7">
|
||||
<classpath refid="main.compile.classpath" />
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<!-- - - - - - - - - - - - - - - - - -
|
||||
target: jar
|
||||
- - - - - - - - - - - - - - - - - -->
|
||||
<target name="jar" depends="compile">
|
||||
<jar destfile="${jar.file}">
|
||||
<manifest>
|
||||
<attribute name="Main-Class"
|
||||
value="edu.cornell.mannlib.vitro.utilities.sdb2tdb.Sdb2Tdb" />
|
||||
</manifest>
|
||||
<fileset dir="${classes.dir}" />
|
||||
<archives>
|
||||
<zips>
|
||||
<fileset dir="${lib.dir}" includes="**/*.jar" />
|
||||
</zips>
|
||||
</archives>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
BIN
utilities/sdb_to_tdb/lib/jena-arq-2.10.1.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-arq-2.10.1.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-core-2.10.1.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-core-2.10.1.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-iri-0.9.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-iri-0.9.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-sdb-1.3.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-sdb-1.3.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/jena-tdb-0.10.0.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/jena-tdb-0.10.0.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/log4j-1.2.16.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/log4j-1.2.16.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/mysql-connector-java-5.1.30-bin.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/mysql-connector-java-5.1.30-bin.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/slf4j-api-1.6.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/slf4j-api-1.6.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/slf4j-log4j12-1.6.6.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/slf4j-log4j12-1.6.6.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/xercesImpl-2.11.0.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/xercesImpl-2.11.0.jar
Normal file
Binary file not shown.
BIN
utilities/sdb_to_tdb/lib/xml-apis-1.4.01.jar
Normal file
BIN
utilities/sdb_to_tdb/lib/xml-apis-1.4.01.jar
Normal file
Binary file not shown.
|
@ -0,0 +1,182 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.utilities.sdb2tdb;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FilenameFilter;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import com.hp.hpl.jena.query.Dataset;
|
||||
import com.hp.hpl.jena.rdf.model.Model;
|
||||
import com.hp.hpl.jena.sdb.SDBFactory;
|
||||
import com.hp.hpl.jena.sdb.Store;
|
||||
import com.hp.hpl.jena.sdb.StoreDesc;
|
||||
import com.hp.hpl.jena.sdb.store.DatabaseType;
|
||||
import com.hp.hpl.jena.sdb.store.LayoutType;
|
||||
import com.hp.hpl.jena.tdb.TDBFactory;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* java -jar sdb2tdb.jar \
|
||||
* 'jdbc:mysql://localhost/vitrodb?user=vivoUser&password=vivoPass'\
|
||||
* /usr/local/my/tdb
|
||||
* </pre>
|
||||
*/
|
||||
public class Sdb2Tdb {
|
||||
private final String driverClassName;
|
||||
private final String jdbcUrl;
|
||||
private final String destination;
|
||||
private final boolean force;
|
||||
|
||||
private Dataset sdbDataset;
|
||||
private Dataset tdbDataset;
|
||||
|
||||
public Sdb2Tdb(List<String> hardArgs) throws UsageException {
|
||||
List<String> args = new ArrayList<>(hardArgs);
|
||||
|
||||
if (!args.isEmpty() && args.indexOf("force") == (args.size() - 1)) {
|
||||
this.force = true;
|
||||
args.remove(args.size() - 1);
|
||||
} else {
|
||||
this.force = false;
|
||||
}
|
||||
|
||||
if (args.size() == 3) {
|
||||
this.driverClassName = args.remove(0);
|
||||
} else if (args.size() == 2) {
|
||||
this.driverClassName = "com.mysql.jdbc.Driver";
|
||||
} else {
|
||||
throw new UsageException("Wrong number of arguments: "
|
||||
+ hardArgs.size());
|
||||
}
|
||||
|
||||
this.jdbcUrl = args.get(0);
|
||||
this.destination = args.get(1);
|
||||
|
||||
checkDriverClass();
|
||||
checkJdbcUrl();
|
||||
checkDestination();
|
||||
}
|
||||
|
||||
private void checkDriverClass() throws UsageException {
|
||||
try {
|
||||
Class.forName(this.driverClassName).newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new UsageException("Can't instantiate JDBC driver: "
|
||||
+ this.driverClassName);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkJdbcUrl() {
|
||||
if ((!this.jdbcUrl.matches("\\busername\\b"))
|
||||
|| (!this.jdbcUrl.matches("\\bpassword\\b"))) {
|
||||
System.out.println("\nWARNING: The JDBC url probably should "
|
||||
+ "contain values for username and password.\n");
|
||||
}
|
||||
}
|
||||
|
||||
private void checkDestination() throws UsageException {
|
||||
File destDir = new File(this.destination);
|
||||
|
||||
if (!destDir.isDirectory()) {
|
||||
throw new UsageException(
|
||||
"The destination directory does not exist: '"
|
||||
+ this.destination + "'");
|
||||
}
|
||||
|
||||
if (!destDir.canWrite()) {
|
||||
throw new UsageException("Cannot write to '" + this.destination
|
||||
+ "'");
|
||||
}
|
||||
|
||||
if (!(this.force || getDestinationFilenames().isEmpty())) {
|
||||
throw new UsageException("The destination directory is not empty. "
|
||||
+ "Choose another destination, or use the 'force' option");
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getDestinationFilenames() {
|
||||
File destDir = new File(this.destination);
|
||||
String[] filenames = destDir.list(new FilenameFilter() {
|
||||
@Override
|
||||
public boolean accept(File dir, String name) {
|
||||
return !(name.equals(".") || name.equals(".."));
|
||||
}
|
||||
});
|
||||
return Arrays.asList(filenames);
|
||||
}
|
||||
|
||||
private void translate() throws SQLException {
|
||||
try {
|
||||
sdbDataset = openSdbDataset();
|
||||
tdbDataset = openTdbDataset();
|
||||
copyGraphs();
|
||||
} finally {
|
||||
if (tdbDataset != null) {
|
||||
tdbDataset.close();
|
||||
}
|
||||
if (sdbDataset != null) {
|
||||
sdbDataset.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Dataset openSdbDataset() throws SQLException {
|
||||
Connection conn = DriverManager.getConnection(this.jdbcUrl);
|
||||
Store store = SDBFactory.connectStore(conn, makeSdbStoreDesc());
|
||||
return SDBFactory.connectDataset(store);
|
||||
}
|
||||
|
||||
private StoreDesc makeSdbStoreDesc() {
|
||||
return new StoreDesc(LayoutType.LayoutTripleNodesHash,
|
||||
DatabaseType.MySQL);
|
||||
}
|
||||
|
||||
private Dataset openTdbDataset() {
|
||||
return TDBFactory.createDataset(new File(this.destination)
|
||||
.getAbsolutePath());
|
||||
}
|
||||
|
||||
private void copyGraphs() {
|
||||
for (Iterator<String> modelNames = sdbDataset.listNames(); modelNames
|
||||
.hasNext();) {
|
||||
String modelName = modelNames.next();
|
||||
Model model = sdbDataset.getNamedModel(modelName);
|
||||
System.out.println(String.format("Copying %6d triples: %s",
|
||||
model.size(), modelName));
|
||||
tdbDataset.addNamedModel(modelName, model);
|
||||
model.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
Sdb2Tdb sdb2tdb = new Sdb2Tdb(Arrays.asList(args));
|
||||
sdb2tdb.translate();
|
||||
|
||||
} catch (UsageException e) {
|
||||
System.out.println();
|
||||
System.out.println(e.getMessage());
|
||||
System.out.println(e.getProperUsage());
|
||||
System.out.println();
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static class UsageException extends Exception {
|
||||
public UsageException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public String getProperUsage() {
|
||||
return "Usage is: java -jar sdb2tdb [driver_class] <jdbcUrl> <destination_directory> [force]";
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue