VIVO-823 Improve the TDB implementation of RDFService

Use a single dataset for long-term and short-term use. Get read and write locks for all access to the dataset.
Create a "VitroConnection.DataSource.tdbDirectory" property that will force and configure TDB for content models.
This commit is contained in:
Jim Blake 2014-07-28 11:54:12 -04:00
parent 0e5ee7226b
commit 2baa2c33e0
4 changed files with 141 additions and 34 deletions

View file

@ -3,14 +3,18 @@
package edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.tdb; package edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.tdb;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.tdb.TDB; import com.hp.hpl.jena.tdb.TDB;
import com.hp.hpl.jena.tdb.TDBFactory; import com.hp.hpl.jena.tdb.TDBFactory;
@ -65,10 +69,16 @@ public class RDFServiceTDB extends RDFServiceJena {
log.debug("Change Set: " + changeSet); log.debug("Change Set: " + changeSet);
} }
notifyListenersOfPreChangeEvents(changeSet); notifyListenersOfPreChangeEvents(changeSet);
applyChangeSetToModel(changeSet, dataset); applyChangeSetToModel(changeSet, dataset);
TDB.sync(dataset);
dataset.getLock().enterCriticalSection(Lock.WRITE);
try {
TDB.sync(dataset);
} finally {
dataset.getLock().leaveCriticalSection();
}
notifyListenersOfChanges(changeSet); notifyListenersOfChanges(changeSet);
notifyListenersOfPostChangeEvents(changeSet); notifyListenersOfPostChangeEvents(changeSet);
@ -82,7 +92,77 @@ public class RDFServiceTDB extends RDFServiceJena {
@Override @Override
public void close() { public void close() {
if (this.dataset != null) { if (this.dataset != null) {
dataset.close(); dataset.getLock().enterCriticalSection(Lock.WRITE);
try {
dataset.close();
} finally {
dataset.getLock().leaveCriticalSection();
}
} }
} }
@Override
public InputStream sparqlConstructQuery(String query,
ModelSerializationFormat resultFormat) throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
return super.sparqlConstructQuery(query, resultFormat);
} finally {
dataset.getLock().leaveCriticalSection();
}
}
@Override
public InputStream sparqlDescribeQuery(String query,
ModelSerializationFormat resultFormat) throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
return super.sparqlDescribeQuery(query, resultFormat);
} finally {
dataset.getLock().leaveCriticalSection();
}
}
@Override
public void sparqlSelectQuery(String query, ResultFormat resultFormat,
OutputStream outputStream) throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
super.sparqlSelectQuery(query, resultFormat, outputStream);
} finally {
dataset.getLock().leaveCriticalSection();
}
}
@Override
public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat)
throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
return super.sparqlSelectQuery(query, resultFormat);
} finally {
dataset.getLock().leaveCriticalSection();
}
}
@Override
public boolean sparqlAskQuery(String query) throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
return super.sparqlAskQuery(query);
} finally {
dataset.getLock().leaveCriticalSection();
}
}
@Override
public List<String> getGraphURIs() throws RDFServiceException {
dataset.getLock().enterCriticalSection(Lock.READ);
try {
return super.getGraphURIs();
} finally {
dataset.getLock().leaveCriticalSection();
}
}
} }

View file

@ -2,7 +2,10 @@
package edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup; package edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup;
import static edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService.CONFIGURATION;
import static edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService.CONTENT;
import static edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.impl.sparql.RDFSourceSPARQL.PROPERTY_SPARQL_ENDPOINT_URI; import static edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.impl.sparql.RDFSourceSPARQL.PROPERTY_SPARQL_ENDPOINT_URI;
import static edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.impl.tdb.RDFSourceTDB.PROPERTY_CONTENT_TDB_PATH;
import javax.servlet.ServletContext; import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextEvent;
@ -70,10 +73,12 @@ public class RDFSetup implements ServletContextListener {
private void createRdfSources() { private void createRdfSources() {
if (isSparqlEndpointContentConfigured()) { if (isSparqlEndpointContentConfigured()) {
contentRdfSource = new RDFSourceSPARQL(ctx, this); contentRdfSource = new RDFSourceSPARQL(ctx, this);
} else if (isTdbConfigured()) {
contentRdfSource = new RDFSourceTDB(ctx, this, CONTENT);
} else { } else {
contentRdfSource = new RDFSourceSDB(ctx, this); contentRdfSource = new RDFSourceSDB(ctx, this);
} }
configurationRdfSource = new RDFSourceTDB(ctx, this); configurationRdfSource = new RDFSourceTDB(ctx, this, CONFIGURATION);
} }
private boolean isSparqlEndpointContentConfigured() { private boolean isSparqlEndpointContentConfigured() {
@ -81,6 +86,11 @@ public class RDFSetup implements ServletContextListener {
.getProperty(PROPERTY_SPARQL_ENDPOINT_URI)); .getProperty(PROPERTY_SPARQL_ENDPOINT_URI));
} }
private boolean isTdbConfigured() {
return StringUtils.isNotBlank(configProps
.getProperty(PROPERTY_CONTENT_TDB_PATH));
}
@Override @Override
public void contextDestroyed(ServletContextEvent sce) { public void contextDestroyed(ServletContextEvent sce) {
if (configurationRdfSource != null) { if (configurationRdfSource != null) {

View file

@ -9,16 +9,14 @@ import edu.cornell.mannlib.vitro.webapp.modelaccess.ContentModelMakerFactory;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelMakerFactory; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelMakerFactory;
import edu.cornell.mannlib.vitro.webapp.modelaccess.adapters.ListCachingModelMaker; import edu.cornell.mannlib.vitro.webapp.modelaccess.adapters.ListCachingModelMaker;
import edu.cornell.mannlib.vitro.webapp.modelaccess.adapters.MemoryMappingModelMaker; import edu.cornell.mannlib.vitro.webapp.modelaccess.adapters.MemoryMappingModelMaker;
import edu.cornell.mannlib.vitro.webapp.modelaccess.adapters.ShadowingModelMaker;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
/** /**
* In TDB, is there any difference between short-term and long-term connections? * In TDB, is there any difference between short-term and long-term connections?
* For now, use long-term connections for all models, memory-mapping the small
* ones.
* *
* Anyway, memory-map the small models, and use a short-term connection for the * RDFService doesn't support empty models, so support them with ListCaching.
* others (when available).
*
* RDFService doesn't support empty models, so support them with ListCaching
*/ */
public class ContentModelMakerFactoryTDB extends ContentModelMakerFactory public class ContentModelMakerFactoryTDB extends ContentModelMakerFactory
implements ModelMakerFactory { implements ModelMakerFactory {
@ -26,14 +24,13 @@ public class ContentModelMakerFactoryTDB extends ContentModelMakerFactory
private final ModelMaker longTermModelMaker; private final ModelMaker longTermModelMaker;
public ContentModelMakerFactoryTDB(RDFService longTermRdfService) { public ContentModelMakerFactoryTDB(RDFService longTermRdfService) {
this.longTermModelMaker = new ListCachingModelMaker(new MemoryMappingModelMaker( this.longTermModelMaker = new ListCachingModelMaker(
new RDFServiceModelMaker(longTermRdfService), new MemoryMappingModelMaker(new RDFServiceModelMaker(
SMALL_CONTENT_MODELS)); longTermRdfService), SMALL_CONTENT_MODELS));
} }
/** /**
* The small content models (tbox, app_metadata) are memory mapped, for * The small content models are memory mapped, for speed.
* speed.
*/ */
@Override @Override
public ModelMaker getModelMaker(RDFService longTermRdfService) { public ModelMaker getModelMaker(RDFService longTermRdfService) {
@ -41,19 +38,12 @@ public class ContentModelMakerFactoryTDB extends ContentModelMakerFactory
} }
/** /**
* For short-term use, the large models (abox) will come from a short-term * There are no connections or connection pool, so short-term use is the
* service. The small models can be the memory-mapped ones that we created * same as long-term use.
* for long-term use.
*/ */
@Override @Override
public ModelMaker getShortTermModelMaker(RDFService shortTermRdfService) { public ModelMaker getShortTermModelMaker(RDFService shortTermRdfService) {
ModelMaker shortTermModelMaker = new RDFServiceModelMaker( return addContentDecorators(longTermModelMaker);
shortTermRdfService);
// No need to create a fresh memory map of the small models: use the
// long-term ones.
return addContentDecorators(new ShadowingModelMaker(
shortTermModelMaker, longTermModelMaker, SMALL_CONTENT_MODELS));
} }
} }

View file

@ -2,6 +2,9 @@
package edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.impl.tdb; package edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.impl.tdb;
import static edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService.CONFIGURATION;
import static edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService.CONTENT;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -11,17 +14,21 @@ import javax.servlet.ServletContextListener;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.tdb.TDB;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties; import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelMakerFactory; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelMakerFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.tdb.RDFServiceTDB; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.tdb.RDFServiceTDB;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.RDFSource; import edu.cornell.mannlib.vitro.webapp.servlet.setup.rdfsetup.RDFSource;
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus; import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
/** /**
* Create the connection to the TDB triple-store. * Create the connection to the TDB triple-store. This connection is either for
* CONTENT or for CONFIGURATION, but not both.
* *
* Create the RDFService on the directory. Create the RDFServiceFactory. * Create the RDFService on the directory. Create the RDFServiceFactory.
*/ */
@ -29,19 +36,33 @@ public class RDFSourceTDB implements RDFSource {
private static final Log log = LogFactory.getLog(RDFSourceTDB.class); private static final Log log = LogFactory.getLog(RDFSourceTDB.class);
private static final String DIRECTORY_TDB = "tdbModels"; private static final String DIRECTORY_TDB = "tdbModels";
public static final String PROPERTY_CONTENT_TDB_PATH = "VitroConnection.DataSource.tdbDirectory";
private final ConfigurationProperties props; private final ConfigurationProperties props;
private final StartupStatus ss; private final StartupStatus ss;
private final WhichService which;
private final RDFService rdfService; private final RDFService rdfService;
private final RDFServiceFactory rdfServiceFactory; private final RDFServiceFactory rdfServiceFactory;
public RDFSourceTDB(ServletContext ctx, ServletContextListener parent) { public RDFSourceTDB(ServletContext ctx, ServletContextListener parent,
WhichService which) {
this.props = ConfigurationProperties.getBean(ctx); this.props = ConfigurationProperties.getBean(ctx);
this.ss = StartupStatus.getBean(ctx); this.ss = StartupStatus.getBean(ctx);
this.which = which;
configureTDB();
String tdbPath;
if (CONTENT == which) {
tdbPath = props.getProperty(PROPERTY_CONTENT_TDB_PATH);
} else {
String vitroHome = props.getProperty("vitro.home");
tdbPath = vitroHome + File.separatorChar + DIRECTORY_TDB;
}
try { try {
this.rdfService = createRdfService(); this.rdfService = new RDFServiceTDB(tdbPath);
this.rdfServiceFactory = createRDFServiceFactory(); this.rdfServiceFactory = createRDFServiceFactory();
ss.info(parent, "Initialized the RDF source for TDB"); ss.info(parent, "Initialized the RDF source for TDB");
} catch (IOException e) { } catch (IOException e) {
@ -50,10 +71,8 @@ public class RDFSourceTDB implements RDFSource {
} }
} }
private RDFService createRdfService() throws IOException { private void configureTDB() {
String vitroHome = props.getProperty("vitro.home"); TDB.getContext().setTrue(TDB.symUnionDefaultGraph);
String directoryPath = vitroHome + File.separatorChar + DIRECTORY_TDB;
return new RDFServiceTDB(directoryPath);
} }
private RDFServiceFactory createRDFServiceFactory() { private RDFServiceFactory createRDFServiceFactory() {
@ -67,12 +86,20 @@ public class RDFSourceTDB implements RDFSource {
@Override @Override
public ModelMakerFactory getContentModelMakerFactory() { public ModelMakerFactory getContentModelMakerFactory() {
return new ContentModelMakerFactoryTDB(this.rdfService); if (CONTENT == which) {
return new ContentModelMakerFactoryTDB(this.rdfService);
} else {
throw new IllegalStateException("This RDFSource is for " + which);
}
} }
@Override @Override
public ModelMakerFactory getConfigurationModelMakerFactory() { public ModelMakerFactory getConfigurationModelMakerFactory() {
return new ConfigurationModelMakerFactoryTDB(this.rdfService); if (CONFIGURATION == which) {
return new ConfigurationModelMakerFactoryTDB(this.rdfService);
} else {
throw new IllegalStateException("This RDFSource is for " + which);
}
} }
@Override @Override