From 0aa6f35e814da863be9bebcb58f4a511866c4ed8 Mon Sep 17 00:00:00 2001 From: pkupczyk <pkupczyk> Date: Sat, 25 Nov 2017 15:10:05 +0000 Subject: [PATCH] SSDM-5370 : openBIS sync datasource doesn't scale - in data source connector use the same logger as in the harvester (depending on the sync configuration it can log to a dedicated log file) + improve merging of resource list parts (e.g. fix xml namespaces problems) SVN: 38951 --- .../synchronizer/EntitySynchronizer.java | 2 +- .../DataSourceConnector.java | 45 ++++++++----------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/EntitySynchronizer.java b/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/EntitySynchronizer.java index 8a93da333dd..77b7fd670f0 100644 --- a/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/EntitySynchronizer.java +++ b/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/EntitySynchronizer.java @@ -186,7 +186,7 @@ public class EntitySynchronizer public Date syncronizeEntities() throws Exception { - DataSourceConnector dataSourceConnector = new DataSourceConnector(config.getDataSourceURI(), config.getAuthenticationCredentials()); + DataSourceConnector dataSourceConnector = new DataSourceConnector(config.getDataSourceURI(), config.getAuthenticationCredentials(), operationLog); return syncronizeEntities(dataSourceConnector); } diff --git a/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/datasourceconnector/DataSourceConnector.java b/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/datasourceconnector/DataSourceConnector.java index b670d331457..b6c3a342d5a 100644 --- a/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/datasourceconnector/DataSourceConnector.java +++ b/datastore_server/source/java/ch/ethz/sis/openbis/generic/server/dss/plugins/sync/harvester/synchronizer/datasourceconnector/DataSourceConnector.java @@ -40,8 +40,6 @@ import org.xml.sax.SAXException; import ch.ethz.sis.openbis.generic.server.dss.plugins.sync.harvester.config.BasicAuthCredentials; import ch.systemsx.cisd.common.http.JettyHttpClientFactory; -import ch.systemsx.cisd.common.logging.LogCategory; -import ch.systemsx.cisd.common.logging.LogFactory; /** * @author Ganime Betul Akin @@ -49,20 +47,17 @@ import ch.systemsx.cisd.common.logging.LogFactory; public class DataSourceConnector implements IDataSourceConnector { - private static final Logger operationLog = LogFactory.getLogger(LogCategory.OPERATION, DataSourceConnector.class); - - final String ENTRY_START_TAG = "<url>"; - - final String ENTRY_FINISH_TAG = "</url>"; - private final String dataSourceUrl; private final BasicAuthCredentials authCredentials; - public DataSourceConnector(String url, BasicAuthCredentials authCredentials) + private Logger operationLog; + + public DataSourceConnector(String url, BasicAuthCredentials authCredentials, Logger operationLog) { this.dataSourceUrl = url; this.authCredentials = authCredentials; + this.operationLog = operationLog; } @Override @@ -80,7 +75,7 @@ public class DataSourceConnector implements IDataSourceConnector { operationLog.info("Received a resource list index (the resource list was too big and was split into parts)."); List<String> locations = getResourceListPartLocations(document); - List<String> parts = loadResourceListParts(client, locations); + List<Document> parts = loadResourceListParts(client, locations); return mergeResourceListParts(parts); } else { @@ -125,9 +120,9 @@ public class DataSourceConnector implements IDataSourceConnector return locations; } - private List<String> loadResourceListParts(HttpClient client, List<String> locations) throws Exception + private List<Document> loadResourceListParts(HttpClient client, List<String> locations) throws Exception { - List<String> parts = new ArrayList<String>(); + List<Document> parts = new ArrayList<Document>(); for (String location : locations) { @@ -135,37 +130,35 @@ public class DataSourceConnector implements IDataSourceConnector operationLog.info("Start loading a resource list part from " + location); ContentResponse response = getResponse(request); operationLog.info("Received the resource list part."); - parts.add(response.getContentAsString()); + parts.add(parse(response.getContent())); } return parts; } - private Document mergeResourceListParts(List<String> parts) throws Exception + private Document mergeResourceListParts(List<Document> parts) throws Exception { - StringBuilder merged = new StringBuilder(); + Document mergedDocument = parts.get(0); + Node mergedUrlset = mergedDocument.getFirstChild(); - if (parts.size() > 0) + for (int i = 1; i < parts.size(); i++) { - merged.append(parts.get(0).substring(0, parts.get(0).indexOf(ENTRY_START_TAG))); + Document part = parts.get(i); + Node urlset = part.getFirstChild(); - for (String part : parts) + for (int j = 0; j < urlset.getChildNodes().getLength(); j++) { - int firstEntryIndex = part.indexOf(ENTRY_START_TAG); - int lastEntryIndex = part.lastIndexOf(ENTRY_FINISH_TAG); - - if (firstEntryIndex != -1 && lastEntryIndex != -1 && firstEntryIndex < lastEntryIndex) + Node child = urlset.getChildNodes().item(j); + if (child.getNodeName().equals("url")) { - merged.append(part.substring(firstEntryIndex, lastEntryIndex + ENTRY_FINISH_TAG.length())); + mergedUrlset.appendChild(mergedDocument.importNode(child, true)); } } - - merged.append("</urlset>"); } operationLog.info("Merged the resource list parts."); - return parse(merged.toString().getBytes()); + return mergedDocument; } private Document parse(byte[] content) throws ParserConfigurationException, SAXException, IOException -- GitLab