From 9a004c9c9166ab52f58dcdd9463292956d98dce4 Mon Sep 17 00:00:00 2001
From: felmer <felmer>
Date: Mon, 16 Nov 2009 15:38:27 +0000
Subject: [PATCH] SE-163

SVN: 13426
---
 .../phosphonetx/BuildAndEnvironmentInfo.java  |  46 ------
 .../client/application/OccurrencesMarker.java | 148 +-----------------
 .../shared/basic/dto/Occurrence.java          |  64 ++++++++
 .../shared/basic/dto/OccurrenceUtil.java      |  84 ++++++++++
 .../application/OccurrencesMarkerTest.java    |   4 +-
 .../shared/basic/dto/OccurrenceUtilTest.java  |  46 ++++++
 6 files changed, 200 insertions(+), 192 deletions(-)
 delete mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java
 create mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java
 create mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java
 create mode 100644 rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java

diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java
deleted file mode 100644
index 3cfcbdfbfd9..00000000000
--- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2009 ETH Zuerich, CISD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package ch.systemsx.cisd.openbis.plugin.phosphonetx;
-
-import ch.systemsx.cisd.base.utilities.AbstractBuildAndEnvironmentInfo;
-
-
-/**
- * The build and environment information for PhosphoNetX.
- *
- * @author Franz-Josef Elmer
- */
-public class BuildAndEnvironmentInfo extends AbstractBuildAndEnvironmentInfo
-{
-    private final static String BASE = "rtd_phosphonetx";
-    
-    public final static BuildAndEnvironmentInfo INSTANCE = new BuildAndEnvironmentInfo();
-    
-    private BuildAndEnvironmentInfo()
-    {
-        super(BASE);
-    }
-
-    /**
-     * Shows build and environment information on the console.
-     */
-    public static void main(String[] args)
-    {
-        System.out.println(INSTANCE);
-    }
-
-}
diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
index 006bd2f2cf2..87cd57b8d6c 100644
--- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
@@ -16,10 +16,11 @@
 
 package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application;
 
-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 
+import ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto.Occurrence;
+import ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto.OccurrenceUtil;
+
 /**
  * This class is able to find all occurrences of the given set of words in the provided template and
  * mark the beginning and end of those words. Note that when occurrences overlap the overlaps are
@@ -135,105 +136,7 @@ public class OccurrencesMarker
      */
     public String mark(String template, List<String> words)
     {
-        List<Occurrence> occurrences = calcSortedOccurrences(template, words);
-        if (hasOverlapping(occurrences))
-        {
-            return markOverlapping(template, occurrences);
-        } else
-        {
-            return markNonoverlapping(template, occurrences);
-        }
-    }
-
-    // describes one occurence of the word in a template
-    private static class Occurrence implements Comparable<Occurrence>
-    {
-        private final String word;
-
-        private final int startIndex;
-
-        public Occurrence(String word, int startIndex)
-        {
-            this.word = word;
-            this.startIndex = startIndex;
-        }
-
-        public String getWord()
-        {
-            return word;
-        }
-
-        public int getStartIndex()
-        {
-            return startIndex;
-        }
-
-        public int getEndIndex()
-        {
-            return startIndex + word.length() - 1;
-        }
-
-        public int compareTo(Occurrence o)
-        {
-            return getStartIndex() - o.getStartIndex();
-        }
-
-        @Override
-        public String toString()
-        {
-            return "[" + word + "@" + startIndex + "]";
-        }
-    }
-
-    private String markOverlapping(String template, List<Occurrence> sortedOccurrences)
-    {
-        List<Occurrence> mergedOccurrences = mergeOverlaps(template, sortedOccurrences);
-        return markNonoverlapping(template, mergedOccurrences);
-    }
-
-    private List<Occurrence> mergeOverlaps(String template, List<Occurrence> sortedOccurrences)
-    {
-        List<Occurrence> result = new ArrayList<Occurrence>();
-        if (sortedOccurrences.size() == 0)
-        {
-            return result;
-        }
-        int startIndex = -1;
-        int endIndex = -1;
-        for (Occurrence occurrence : sortedOccurrences)
-        {
-            if (occurrence.getStartIndex() <= endIndex)
-            { // overlap
-                endIndex = Math.max(endIndex, occurrence.getEndIndex());
-            } else
-            { // current word does not overlap with the words browsed before
-                if (startIndex != -1)
-                { // create a new word from the words browsed before
-                    Occurrence newOccurrence = createOccurence(template, startIndex, endIndex);
-                    result.add(newOccurrence);
-                }
-                startIndex = occurrence.getStartIndex();
-                endIndex = occurrence.getEndIndex();
-            }
-        }
-        Occurrence newOccurrence = createOccurence(template, startIndex, endIndex);
-        result.add(newOccurrence);
-        return result;
-    }
-
-    private static Occurrence createOccurence(String template, int startIndex, int endIndex)
-    {
-        assert startIndex != -1 : "start index should be initialized";
-        assert endIndex != -1 : "end index should be initialized";
-        String mergedWord = template.substring(startIndex, endIndex + 1);
-        Occurrence newOccurrence = new Occurrence(mergedWord, startIndex);
-        return newOccurrence;
-    }
-
-    // marks all occurrences in the template, assuming that all occurrences do not
-    // overlap with each other and are sorted by the start position
-    private String markNonoverlapping(String template, List<Occurrence> sortedOccurrences)
-    {
+        List<Occurrence> sortedOccurrences = OccurrenceUtil.getCoverage(template, words);
         StringBuffer sb = new StringBuffer();
         int nextUnprocessedCharIndex = 0;
         for (Occurrence occurrence : sortedOccurrences)
@@ -249,47 +152,4 @@ public class OccurrencesMarker
         return sb.toString();
     }
 
-    // true if two occurrences of the same word in the template overlap (have a common part)
-    private static boolean hasOverlapping(List<Occurrence> occurrences)
-    {
-        int prevEndIndex = -1;
-        for (Occurrence occurrence : occurrences)
-        {
-            if (occurrence.getStartIndex() <= prevEndIndex)
-            {
-                return true;
-            }
-            prevEndIndex = Math.max(prevEndIndex, occurrence.getEndIndex());
-        }
-        return false;
-    }
-
-    // calculates a list of all words occurances, sorts it be starting position
-    private static List<Occurrence> calcSortedOccurrences(String template, List<String> words)
-    {
-        List<Occurrence> result = new ArrayList<Occurrence>();
-        for (String word : words)
-        {
-            result.addAll(calcOccurrences(template, word));
-        }
-        Collections.sort(result);
-        return result;
-    }
-
-    private static List<Occurrence> calcOccurrences(String template, String word)
-    {
-        List<Occurrence> result = new ArrayList<Occurrence>();
-        int startIndex = 0;
-        while (true)
-        {
-            int occurrenceIndex = template.indexOf(word, startIndex);
-            if (occurrenceIndex == -1)
-            {
-                break;
-            }
-            result.add(new Occurrence(word, occurrenceIndex));
-            startIndex = occurrenceIndex + 1; // maybe the word overlaps with itself?
-        }
-        return result;
-    }
 }
diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java
new file mode 100644
index 00000000000..0a79583f908
--- /dev/null
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto;
+
+/**
+ * The occurrence of a word in a text. Occurrences are comparable by their start indexes.
+ * 
+ * @author Tomasz Pylak
+ */
+public class Occurrence implements Comparable<Occurrence>
+{
+    private final String word;
+
+    private final int startIndex;
+
+    /**
+     * Creates a new instance of the specified word which appears at the specified start index.
+     */
+    public Occurrence(String word, int startIndex)
+    {
+        this.word = word;
+        this.startIndex = startIndex;
+    }
+
+    public String getWord()
+    {
+        return word;
+    }
+
+    public int getStartIndex()
+    {
+        return startIndex;
+    }
+
+    public int getEndIndex()
+    {
+        return startIndex + word.length() - 1;
+    }
+
+    public int compareTo(Occurrence o)
+    {
+        return getStartIndex() - o.getStartIndex();
+    }
+
+    @Override
+    public String toString()
+    {
+        return "[" + word + "@" + startIndex + "]";
+    }
+}
\ No newline at end of file
diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java
new file mode 100644
index 00000000000..a9564e33dee
--- /dev/null
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+
+/**
+ * Helper class for calculating peptide on protein coverage.
+ *
+ * @author Franz-Josef Elmer
+ */
+public class OccurrenceUtil
+{
+    /**
+     * Returns a list of non-overlapping sequences built by the specified peptides covering the
+     * specified protein.
+     */
+    public static List<Occurrence> getCoverage(String protein, List<String> peptides)
+    {
+        ArrayList<Occurrence> list = new ArrayList<Occurrence>();
+        List<Occurrence> sortedList = calcSortedOccurrences(protein, peptides);
+        if (sortedList.isEmpty() == false)
+        {
+            Occurrence current = sortedList.get(0);
+            for (int i = 1; i < sortedList.size(); i++)
+            {
+                String currentPeptide = current.getWord();
+                Occurrence next = sortedList.get(i);
+                int diff = next.getStartIndex() - current.getStartIndex();
+                if (current.getWord().length() < diff)
+                {
+                    list.add(current);
+                    current = next;
+                } else if (current.getEndIndex() < next.getEndIndex())
+                {
+                    String mergedPeptides = currentPeptide.substring(0, diff) + next.getWord();
+                    current = new Occurrence(mergedPeptides, current.getStartIndex());
+                }
+            }
+            list.add(current);
+        }
+        return list;
+    }
+
+    // calculates a list of all words occurences, sorts it be starting position
+    private static List<Occurrence> calcSortedOccurrences(String protein, List<String> peptides)
+    {
+        List<Occurrence> result = new ArrayList<Occurrence>();
+        for (String word : peptides)
+        {
+            int startIndex = 0;
+            while (true)
+            {
+                int occurrenceIndex = protein.indexOf(word, startIndex);
+                if (occurrenceIndex == -1)
+                {
+                    break;
+                }
+                result.add(new Occurrence(word, occurrenceIndex));
+                startIndex = occurrenceIndex + 1; // maybe the word overlaps with itself?
+            }
+        }
+        Collections.sort(result);
+        return result;
+    }
+
+}
diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
index 3679c81b71d..84aefd07bd0 100644
--- a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
+++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
@@ -46,8 +46,8 @@ public class OccurrencesMarkerTest extends AssertJUnit
     public void testFindOverlappingOccurrences()
     {
         OccurrencesMarker marker = createMarker();
-        String marked = marker.mark("aaaa", Arrays.asList("aa"));
-        assertEquals("<aaaa>", marked);
+        assertEquals("<aaaa>", marker.mark("aaaa", Arrays.asList("aa")));
+        assertEquals("h<ello> w<orld>, h<ello> universe!", marker.mark("hello world, hello universe!", Arrays.asList("ell", "o", "orld")));
     }
 
     @Test
diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java
new file mode 100644
index 00000000000..0b14def68ad
--- /dev/null
+++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2009 ETH Zuerich, CISD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto;
+
+import java.util.Arrays;
+
+import org.testng.AssertJUnit;
+import org.testng.annotations.Test;
+
+/**
+ * 
+ *
+ * @author Franz-Josef Elmer
+ */
+public class OccurrenceUtilTest extends AssertJUnit
+{
+    @Test
+    public void test()
+    {
+        check("[[ab@0], [abcd@4], [ab@13]]", "abc abcde hahab", "ab", "bcd");
+        check("[[ab@0], [abcd@4], [ab@13]]", "abc abcde hahab", "ab", "cd");
+        check("[[abc@0], [abc@4], [ab@13]]", "abc abcde hahab", "ab", "b", "c");
+        check("[[abc@0], [abc@4], [b@14]]", "abc abcde hahab", "abc", "b");
+        check("[[abcde@0]]", "abcdef", "abcd", "b", "de");
+        check("[[haha@10]]", "abc abcde hahab", "haha", "h");
+    }
+    
+    private void check(String expectedList, String sequence, String... words)
+    {
+        assertEquals(expectedList, OccurrenceUtil.getCoverage(sequence, Arrays.asList(words)).toString());
+    }
+}
-- 
GitLab