From 35f61bd4cd630d8f3de394a0880eb999e6cadb0c Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Sat, 3 Dec 2011 00:22:31 +0000 Subject: [PATCH] LMS-2664 allow to list materials (or any entities) 60 times faster if there are many of them - do not put more then 50K items in sql 'in' clause. It would be nice to revert this patch and make a fix on EODSQL level at some point. SVN: 23865 --- .../bo/common/AbstractBatchIterator.java | 101 +++++++++++++++++ .../bo/common/EntityPropertiesEnricher.java | 61 +++++++++- .../AbstractBatchIteratorTest.java | 106 ++++++++++++++++++ 3 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java create mode 100644 openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java new file mode 100644 index 00000000000..bff06baf0ef --- /dev/null +++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java @@ -0,0 +1,101 @@ +/* + * Copyright 2011 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.generic.server.business.bo.common; + +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; + +import java.util.Iterator; + +import ch.systemsx.cisd.common.exceptions.NotImplementedException; + +/** + * Allows to convert an memory-inefficient iterator into a one which has lower memory consumption. + * Divides the items used to produce iterated elements into batches and calls the original iterator + * on each batch. The outside interface make the division into batches invisible, but the + * inefficient iterator is never used with a big number of items. + * + * @author Tomasz Pylak + */ +abstract public class AbstractBatchIterator<T> implements Iterable<T> +{ + /** + * Creates an iterator which is not very memory efficient and should not be called with too many + * items at the same time. + */ + abstract protected Iterable<T> createUnefficientIterator(LongSet itemsBatch); + + private final LongSet items; + + protected final int chunkSize; + + protected AbstractBatchIterator(LongSet items, int chunkSize) + { + this.chunkSize = chunkSize; + this.items = items; + } + + public Iterator<T> iterator() + { + final Iterator<Long> unprocessedItems = items.iterator(); + return new Iterator<T>() + { + private Iterator<T> fetchedResults = null; + + public boolean hasNext() + { + return fetchNextPortionIfNeeded(); + } + + public T next() + { + fetchNextPortionIfNeeded(); + return fetchedResults.next(); + } + + private boolean fetchNextPortionIfNeeded() + { + // Special care was taken here to ensure that fetchedResults.hasNext() is not + // called again if it returns false. Some implementations do not like that. + while (true) + { + boolean fetchedHasNext = + (fetchedResults != null && fetchedResults.hasNext()); + // there is a next element or we have processed everything + if (fetchedHasNext || unprocessedItems.hasNext() == false) + { + return fetchedHasNext; + } + + assert fetchedHasNext == false && unprocessedItems.hasNext(); + final LongSet nextPortion = new LongOpenHashSet(); + while (unprocessedItems.hasNext() && nextPortion.size() < chunkSize) + { + Long next = unprocessedItems.next(); + nextPortion.add(next); + } + fetchedResults = createUnefficientIterator(nextPortion).iterator(); + } + } + + public void remove() + { + throw new NotImplementedException(); + } + }; + } +} diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java index 09b7ff4ab67..5c91e24a398 100644 --- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java +++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java @@ -26,8 +26,8 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.GenericEntityProperty; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityPropertiesHolder; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityProperty; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Material; -import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialType; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialEntityProperty; +import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialType; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PropertyType; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTermEntityProperty; @@ -47,7 +47,61 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher final IEntityPropertySetListingQuery setQuery) { this.query = query; - this.propertySetQuery = setQuery; + this.propertySetQuery = createEfficientIterator(setQuery); + } + + private IEntityPropertySetListingQuery createEfficientIterator( + final IEntityPropertySetListingQuery setQuery) + { + return new IEntityPropertySetListingQuery() + { + private final static int BATCH_SIZE = 50000; + + public Iterable<GenericEntityPropertyRecord> getEntityPropertyGenericValues( + LongSet entityIDs) + { + return new AbstractBatchIterator<GenericEntityPropertyRecord>(entityIDs, + BATCH_SIZE) + { + @Override + protected Iterable<GenericEntityPropertyRecord> createUnefficientIterator( + LongSet ids) + { + return setQuery.getEntityPropertyGenericValues(ids); + } + }; + } + + public Iterable<VocabularyTermRecord> getEntityPropertyVocabularyTermValues( + LongSet entityIDs) + { + return new AbstractBatchIterator<VocabularyTermRecord>(entityIDs, BATCH_SIZE) + { + @Override + protected Iterable<VocabularyTermRecord> createUnefficientIterator( + LongSet ids) + { + return setQuery.getEntityPropertyVocabularyTermValues(ids); + } + }; + } + + public Iterable<MaterialEntityPropertyRecord> getEntityPropertyMaterialValues( + LongSet entityIDs) + { + return new AbstractBatchIterator<MaterialEntityPropertyRecord>(entityIDs, + BATCH_SIZE) + { + @Override + protected Iterable<MaterialEntityPropertyRecord> createUnefficientIterator( + LongSet ids) + { + return setQuery.getEntityPropertyMaterialValues(ids); + } + }; + } + + }; } /** @@ -68,6 +122,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher property.setScriptable(val.script_id != null); entity.getProperties().add(property); } + // Controlled vocabulary properties Long2ObjectMap<String> vocabularyURLMap = null; Long2ObjectMap<VocabularyTerm> terms = new Long2ObjectOpenHashMap<VocabularyTerm>(); @@ -98,6 +153,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher property.setPropertyType(propertyTypes.get(val.prty_id)); entity.getProperties().add(property); } + // Material-type properties Long2ObjectMap<MaterialType> materialTypes = null; Long2ObjectMap<Material> materials = new Long2ObjectOpenHashMap<Material>(); @@ -123,6 +179,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher property.setPropertyType(propertyTypes.get(val.prty_id)); entity.getProperties().add(property); } + } private Long2ObjectMap<PropertyType> getPropertyTypes() diff --git a/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java b/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java new file mode 100644 index 00000000000..a1700968bc1 --- /dev/null +++ b/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java @@ -0,0 +1,106 @@ +/* + * Copyright 2011 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.generic.server.business.bo.materiallister; + +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; + +import java.util.Collections; +import java.util.Iterator; + +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +import ch.systemsx.cisd.openbis.generic.server.business.bo.common.AbstractBatchIterator; + +/** + * @author Tomasz Pylak + */ +public class AbstractBatchIteratorTest extends AssertJUnit +{ + private static class TestPartialIterator extends AbstractBatchIterator<Long> + { + int callCounter = 0; + + protected TestPartialIterator(final LongSet ids, int chunkSize) + { + super(ids, chunkSize); + } + + @Override + protected Iterable<Long> createUnefficientIterator(final LongSet ids) + { + if (ids.size() > chunkSize) + { + fail(String.format( + "Chunk of the wrong size requested, expected at most %d, was %d", + chunkSize, ids.size())); + } + callCounter++; + return new Iterable<Long>() + { + public Iterator<Long> iterator() + { + return ids.iterator(); + } + }; + } + } + + @Test + public void test() + { + LongSet request = createRequest(); + TestPartialIterator iter = new TestPartialIterator(request, 3); + + Iterator<Long> efficientIter = iter.iterator(); + LongSet result = new LongOpenHashSet(); + while (efficientIter.hasNext()) + { + result.add(efficientIter.next()); + } + assertEquals(request, result); + } + + @Test + public void testEmpty() + { + LongSet request = createRequest(); + AbstractBatchIterator<Long> iter = new AbstractBatchIterator<Long>(request, 3) + { + @Override + protected Iterable<Long> createUnefficientIterator(LongSet ids) + { + return Collections.emptyList(); + } + }; + + Iterator<Long> efficientIter = iter.iterator(); + assertFalse(efficientIter.hasNext()); + } + + private LongSet createRequest() + { + LongSet request = new LongOpenHashSet(); + for (int i = 0; i < 100; i++) + { + request.add(i * 100); + } + return request; + } + +} -- GitLab