diff --git a/utilities/sdb_to_tdb/src/edu/cornell/mannlib/vitro/utilities/sdb2tdb/Sdb2Tdb.java b/utilities/sdb_to_tdb/src/edu/cornell/mannlib/vitro/utilities/sdb2tdb/Sdb2Tdb.java index 5a9283abf..1ab4866a0 100644 --- a/utilities/sdb_to_tdb/src/edu/cornell/mannlib/vitro/utilities/sdb2tdb/Sdb2Tdb.java +++ b/utilities/sdb_to_tdb/src/edu/cornell/mannlib/vitro/utilities/sdb2tdb/Sdb2Tdb.java @@ -3,7 +3,12 @@ package edu.cornell.mannlib.vitro.utilities.sdb2tdb; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -12,8 +17,19 @@ import java.util.Arrays; import java.util.Iterator; import java.util.List; +import org.apache.jena.riot.RDFDataMgr; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.sdb.SDB; import com.hp.hpl.jena.sdb.SDBFactory; import com.hp.hpl.jena.sdb.Store; import com.hp.hpl.jena.sdb.StoreDesc; @@ -22,13 +38,31 @@ import com.hp.hpl.jena.sdb.store.LayoutType; import com.hp.hpl.jena.tdb.TDBFactory; /** + * Copy all of the data from an SDB triple-store to a TDB triple-store. See + * README.txt for more details. + * + * Examples of invoking it: + * *
* java -jar sdb2tdb.jar \ * 'jdbc:mysql://localhost/vitrodb?user=vivoUser&password=vivoPass'\ * /usr/local/my/tdb + * + * java -Xms2048m -Xmx2048m -jar .work/sdb2tdb.jar \ + * 'jdbc:mysql://localhost/weill17?user=vivoUser&password=vivoPass' \ + * /Users/jeb228/Testing/instances/weill-develop/vivo_home/contentTdb \ + * force *+ * + * Each graph is copied separately. Small graphs are simply loaded into memory + * and transferred. Large graphs are read to produce a streaming result set + * which is written to a temporary file. That file is then read into a TDB + * model. + * + * This has been tested with graphs up to 6 million triples without crashing. */ public class Sdb2Tdb { + private static final int LARGE_MODEL_THRESHOLD = 500_000; private final String driverClassName; private final String jdbcUrl; private final String destination; @@ -74,10 +108,10 @@ public class Sdb2Tdb { } private void checkJdbcUrl() { - if ((!this.jdbcUrl.matches("\\busername\\b")) + if ((!this.jdbcUrl.matches("\\buser\\b")) || (!this.jdbcUrl.matches("\\bpassword\\b"))) { System.out.println("\nWARNING: The JDBC url probably should " - + "contain values for username and password.\n"); + + "contain values for user and password.\n"); } } @@ -112,7 +146,7 @@ public class Sdb2Tdb { return Arrays.asList(filenames); } - private void translate() throws SQLException { + private void translate() throws SQLException, IOException { try { sdbDataset = openSdbDataset(); tdbDataset = openTdbDataset(); @@ -130,6 +164,10 @@ public class Sdb2Tdb { private Dataset openSdbDataset() throws SQLException { Connection conn = DriverManager.getConnection(this.jdbcUrl); Store store = SDBFactory.connectStore(conn, makeSdbStoreDesc()); + + SDB.getContext().set(SDB.jdbcStream, Boolean.TRUE); + SDB.getContext().set(SDB.jdbcFetchSize, Integer.MIN_VALUE); + return SDBFactory.connectDataset(store); } @@ -143,18 +181,44 @@ public class Sdb2Tdb { .getAbsolutePath()); } - private void copyGraphs() { + private void copyGraphs() throws IOException { for (Iterator