From afda0bc563cdfdf8f22da318b5deffd121618269 Mon Sep 17 00:00:00 2001
From: anttil <anttil>
Date: Fri, 5 Feb 2016 11:29:17 +0000
Subject: [PATCH] SSDM-3147: Global search: XML stripping with wildcard mode
 showed empty matches in result

SVN: 35642
---
 .../dataaccess/db/HibernateSearchDAO.java     | 53 +++++++++++++++++--
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/dataaccess/db/HibernateSearchDAO.java b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/dataaccess/db/HibernateSearchDAO.java
index 9f9ca664138..cc9d375da80 100644
--- a/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/dataaccess/db/HibernateSearchDAO.java
+++ b/openbis/source/java/ch/systemsx/cisd/openbis/generic/server/dataaccess/db/HibernateSearchDAO.java
@@ -17,6 +17,7 @@
 package ch.systemsx.cisd.openbis.generic.server.dataaccess.db;
 
 import java.io.IOException;
+import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -29,6 +30,10 @@ import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamConstants;
+import javax.xml.stream.XMLStreamReader;
+
 import org.apache.commons.lang.StringUtils;
 import org.apache.log4j.Logger;
 import org.apache.lucene.analysis.Analyzer;
@@ -73,6 +78,8 @@ import org.springframework.jdbc.support.JdbcAccessor;
 import org.springframework.orm.hibernate4.HibernateCallback;
 import org.springframework.orm.hibernate4.support.HibernateDaoSupport;
 
+import ch.systemsx.cisd.common.collection.CollectionUtils;
+import ch.systemsx.cisd.common.collection.CollectionUtils.ICollectionFilter;
 import ch.systemsx.cisd.common.exceptions.UserFailureException;
 import ch.systemsx.cisd.common.logging.LogCategory;
 import ch.systemsx.cisd.common.logging.LogFactory;
@@ -113,6 +120,8 @@ final class HibernateSearchDAO extends HibernateDaoSupport implements IHibernate
 
     private Map<String, DocValuesType> fieldTypesCache;
 
+    private final XMLInputFactory xif = XMLInputFactory.newFactory();
+
     HibernateSearchDAO(final SessionFactory sessionFactory,
             HibernateSearchContext hibernateSearchContext)
     {
@@ -378,17 +387,18 @@ final class HibernateSearchDAO extends HibernateDaoSupport implements IHibernate
                             if (useWildcardSearchMode)
                             {
                                 Pattern pattern = Pattern.compile("(?s)(^|\\s)" + q.toLowerCase().replace("*", ".*").replace("?", ".?") + "($|\\s)");
-                                Matcher matcher = pattern.matcher(content[i].toLowerCase());
 
+                                String cont = stripXml(content[i].toLowerCase());
+                                Matcher matcher = pattern.matcher(cont);
                                 while (matcher.find())
                                 {
                                     int start = matcher.start();
                                     int end = matcher.end();
-                                    score += getScore(content[i].toLowerCase(), start, end, fields[i], useWildcardSearchMode);
+                                    score += getScore(cont, start, end, fields[i], useWildcardSearchMode);
                                 }
                             } else
                             {
-                                String rest = content[i].toLowerCase();
+                                String rest = stripXml(content[i].toLowerCase());
                                 while (rest.length() > 0)
                                 {
                                     int start = rest.indexOf(q.toLowerCase());
@@ -483,6 +493,34 @@ final class HibernateSearchDAO extends HibernateDaoSupport implements IHibernate
                     return createMatchingEntity((Document) tuple[1], text, score);
                 }
 
+                private String stripXml(String cont)
+                {
+                    if (cont.startsWith("<") && cont.endsWith(">"))
+                    {
+
+                        StringBuffer value = new StringBuffer();
+                        try
+                        {
+                            XMLStreamReader xsr = xif.createXMLStreamReader(new StringReader(cont));
+                            while (xsr.hasNext())
+                            {
+                                int x = xsr.next();
+                                if (x == XMLStreamConstants.CHARACTERS)
+                                {
+                                    value.append(xsr.getText() + " ");
+                                }
+                            }
+                        } catch (Exception e)
+                        {
+                            value = new StringBuffer(cont);
+                        }
+                        return value.toString();
+                    } else
+                    {
+                        return cont;
+                    }
+                }
+
                 private double getScore(String text, int start, int end, String field, boolean wildcard)
                 {
                     boolean fullmatch = (start == 0 || !StringUtils.isAlphanumeric(text.substring(start - 1, start))) &&
@@ -600,8 +638,15 @@ final class HibernateSearchDAO extends HibernateDaoSupport implements IHibernate
 
         List<?> list = hibernateQuery.list();
         List<MatchingEntity> result = AbstractDAO.cast(list);
+        return CollectionUtils.filter(result, new ICollectionFilter<MatchingEntity>()
+            {
+                @Override
+                public boolean isPresent(MatchingEntity element)
+                {
+                    return element != null && element.getScore() > 0.0;
+                }
+            });
 
-        return filterNulls(result);
     }
 
     // detailed search
-- 
GitLab