From 35f61bd4cd630d8f3de394a0880eb999e6cadb0c Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Sat, 3 Dec 2011 00:22:31 +0000
Subject: [PATCH] LMS-2664 allow to list materials (or any entities) 60 times
 faster if there are many of them - do not put more then 50K items in sql 'in'
 clause. It would be nice to revert this patch and make a fix on EODSQL level
 at some point.

SVN: 23865
---
 .../bo/common/AbstractBatchIterator.java      | 101 +++++++++++++++++
 .../bo/common/EntityPropertiesEnricher.java   |  61 +++++++++-
 .../AbstractBatchIteratorTest.java            | 106 ++++++++++++++++++
 3 files changed, 266 insertions(+), 2 deletions(-)
 create mode 100644 openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java
 create mode 100644 openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java

diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java
new file mode 100644
index 00000000000..bff06baf0ef
--- /dev/null
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/AbstractBatchIterator.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2011 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.generic.server.business.bo.common;
+
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongSet;
+
+import java.util.Iterator;
+
+import ch.systemsx.cisd.common.exceptions.NotImplementedException;
+
+/**
+ * Allows to convert an memory-inefficient iterator into a one which has lower memory consumption.
+ * Divides the items used to produce iterated elements into batches and calls the original iterator
+ * on each batch. The outside interface make the division into batches invisible, but the
+ * inefficient iterator is never used with a big number of items.
+ * 
+ * @author Tomasz Pylak
+ */
+abstract public class AbstractBatchIterator<T> implements Iterable<T>
+{
+    /**
+     * Creates an iterator which is not very memory efficient and should not be called with too many
+     * items at the same time.
+     */
+    abstract protected Iterable<T> createUnefficientIterator(LongSet itemsBatch);
+
+    private final LongSet items;
+
+    protected final int chunkSize;
+
+    protected AbstractBatchIterator(LongSet items, int chunkSize)
+    {
+        this.chunkSize = chunkSize;
+        this.items = items;
+    }
+
+    public Iterator<T> iterator()
+    {
+        final Iterator<Long> unprocessedItems = items.iterator();
+        return new Iterator<T>()
+            {
+                private Iterator<T> fetchedResults = null;
+
+                public boolean hasNext()
+                {
+                    return fetchNextPortionIfNeeded();
+                }
+
+                public T next()
+                {
+                    fetchNextPortionIfNeeded();
+                    return fetchedResults.next();
+                }
+
+                private boolean fetchNextPortionIfNeeded()
+                {
+                    // Special care was taken here to ensure that fetchedResults.hasNext() is not
+                    // called again if it returns false. Some implementations do not like that.
+                    while (true)
+                    {
+                        boolean fetchedHasNext =
+                                (fetchedResults != null && fetchedResults.hasNext());
+                        // there is a next element or we have processed everything
+                        if (fetchedHasNext || unprocessedItems.hasNext() == false)
+                        {
+                            return fetchedHasNext;
+                        }
+
+                        assert fetchedHasNext == false && unprocessedItems.hasNext();
+                        final LongSet nextPortion = new LongOpenHashSet();
+                        while (unprocessedItems.hasNext() && nextPortion.size() < chunkSize)
+                        {
+                            Long next = unprocessedItems.next();
+                            nextPortion.add(next);
+                        }
+                        fetchedResults = createUnefficientIterator(nextPortion).iterator();
+                    }
+                }
+
+                public void remove()
+                {
+                    throw new NotImplementedException();
+                }
+            };
+    }
+}
diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java
index 09b7ff4ab67..5c91e24a398 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/business/bo/common/EntityPropertiesEnricher.java
@@ -26,8 +26,8 @@ import ch.systemsx.cisd.openbis.generic.shared.basic.dto.GenericEntityProperty;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityPropertiesHolder;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.IEntityProperty;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.Material;
-import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialType;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialEntityProperty;
+import ch.systemsx.cisd.openbis.generic.shared.basic.dto.MaterialType;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.PropertyType;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTerm;
 import ch.systemsx.cisd.openbis.generic.shared.basic.dto.VocabularyTermEntityProperty;
@@ -47,7 +47,61 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher
             final IEntityPropertySetListingQuery setQuery)
     {
         this.query = query;
-        this.propertySetQuery = setQuery;
+        this.propertySetQuery = createEfficientIterator(setQuery);
+    }
+
+    private IEntityPropertySetListingQuery createEfficientIterator(
+            final IEntityPropertySetListingQuery setQuery)
+    {
+        return new IEntityPropertySetListingQuery()
+            {
+                private final static int BATCH_SIZE = 50000;
+
+                public Iterable<GenericEntityPropertyRecord> getEntityPropertyGenericValues(
+                        LongSet entityIDs)
+                {
+                    return new AbstractBatchIterator<GenericEntityPropertyRecord>(entityIDs,
+                            BATCH_SIZE)
+                        {
+                            @Override
+                            protected Iterable<GenericEntityPropertyRecord> createUnefficientIterator(
+                                    LongSet ids)
+                            {
+                                return setQuery.getEntityPropertyGenericValues(ids);
+                            }
+                        };
+                }
+
+                public Iterable<VocabularyTermRecord> getEntityPropertyVocabularyTermValues(
+                        LongSet entityIDs)
+                {
+                    return new AbstractBatchIterator<VocabularyTermRecord>(entityIDs, BATCH_SIZE)
+                        {
+                            @Override
+                            protected Iterable<VocabularyTermRecord> createUnefficientIterator(
+                                    LongSet ids)
+                            {
+                                return setQuery.getEntityPropertyVocabularyTermValues(ids);
+                            }
+                        };
+                }
+
+                public Iterable<MaterialEntityPropertyRecord> getEntityPropertyMaterialValues(
+                        LongSet entityIDs)
+                {
+                    return new AbstractBatchIterator<MaterialEntityPropertyRecord>(entityIDs,
+                            BATCH_SIZE)
+                        {
+                            @Override
+                            protected Iterable<MaterialEntityPropertyRecord> createUnefficientIterator(
+                                    LongSet ids)
+                            {
+                                return setQuery.getEntityPropertyMaterialValues(ids);
+                            }
+                        };
+                }
+
+            };
     }
 
     /**
@@ -68,6 +122,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher
             property.setScriptable(val.script_id != null);
             entity.getProperties().add(property);
         }
+
         // Controlled vocabulary properties
         Long2ObjectMap<String> vocabularyURLMap = null;
         Long2ObjectMap<VocabularyTerm> terms = new Long2ObjectOpenHashMap<VocabularyTerm>();
@@ -98,6 +153,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher
             property.setPropertyType(propertyTypes.get(val.prty_id));
             entity.getProperties().add(property);
         }
+
         // Material-type properties
         Long2ObjectMap<MaterialType> materialTypes = null;
         Long2ObjectMap<Material> materials = new Long2ObjectOpenHashMap<Material>();
@@ -123,6 +179,7 @@ public final class EntityPropertiesEnricher implements IEntityPropertiesEnricher
             property.setPropertyType(propertyTypes.get(val.prty_id));
             entity.getProperties().add(property);
         }
+
     }
 
     private Long2ObjectMap<PropertyType> getPropertyTypes()
diff --git a/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java b/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java
new file mode 100644
index 00000000000..a1700968bc1
--- /dev/null
+++ b/openbis/sourceTest/java/ch/systemsx/cisd/openbis/generic/server/business/bo/materiallister/AbstractBatchIteratorTest.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2011 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.generic.server.business.bo.materiallister;
+
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongSet;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+import ch.systemsx.cisd.openbis.generic.server.business.bo.common.AbstractBatchIterator;
+
+/**
+ * @author Tomasz Pylak
+ */
+public class AbstractBatchIteratorTest extends AssertJUnit
+{
+    private static class TestPartialIterator extends AbstractBatchIterator<Long>
+    {
+        int callCounter = 0;
+
+        protected TestPartialIterator(final LongSet ids, int chunkSize)
+        {
+            super(ids, chunkSize);
+        }
+
+        @Override
+        protected Iterable<Long> createUnefficientIterator(final LongSet ids)
+        {
+            if (ids.size() > chunkSize)
+            {
+                fail(String.format(
+                        "Chunk of the wrong size requested, expected at most %d, was %d",
+                        chunkSize, ids.size()));
+            }
+            callCounter++;
+            return new Iterable<Long>()
+                {
+                    public Iterator<Long> iterator()
+                    {
+                        return ids.iterator();
+                    }
+                };
+        }
+    }
+
+    @Test
+    public void test()
+    {
+        LongSet request = createRequest();
+        TestPartialIterator iter = new TestPartialIterator(request, 3);
+
+        Iterator<Long> efficientIter = iter.iterator();
+        LongSet result = new LongOpenHashSet();
+        while (efficientIter.hasNext())
+        {
+            result.add(efficientIter.next());
+        }
+        assertEquals(request, result);
+    }
+
+    @Test
+    public void testEmpty()
+    {
+        LongSet request = createRequest();
+        AbstractBatchIterator<Long> iter = new AbstractBatchIterator<Long>(request, 3)
+            {
+                @Override
+                protected Iterable<Long> createUnefficientIterator(LongSet ids)
+                {
+                    return Collections.emptyList();
+                }
+            };
+
+        Iterator<Long> efficientIter = iter.iterator();
+        assertFalse(efficientIter.hasNext());
+    }
+
+    private LongSet createRequest()
+    {
+        LongSet request = new LongOpenHashSet();
+        for (int i = 0; i < 100; i++)
+        {
+            request.add(i * 100);
+        }
+        return request;
+    }
+
+}
-- 
GitLab