From 09436561a5b39d37a51056a61b440adc0de95ae5 Mon Sep 17 00:00:00 2001
From: tpylak <tpylak>
Date: Fri, 17 Jul 2009 12:24:59 +0000
Subject: [PATCH] LMS-1028 break sequence into many shorter lines, mark
 peptides on the sequence

SVN: 11813
---
 .../client/web/client/application/Dict.java   |   2 -
 .../client/application/OccurrencesMarker.java | 139 ++++++++++--------
 .../web/client/application/ProteinViewer.java |  40 ++---
 .../application/OccurrencesMarkerTest.java    |  64 ++++++--
 4 files changed, 136 insertions(+), 109 deletions(-)

diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java
index d882974f053..41abf3dcb77 100644
--- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java
@@ -52,8 +52,6 @@ public class Dict
 
     public static final String DATA_SET_PERM_ID = "data_set_perm_id";
 
-    public static final String PEPTIDE = "peptide";
-
     public static final String PEPTIDE_COUNT = "peptide_count";
 
     public static final String SEQUENCE_NAME = "sequence_name";
diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
index 27d0ba462b7..cdc2b47c85a 100644
--- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java
@@ -17,51 +17,74 @@
 package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
 /**
  * This class is able to find all occurrences of the given set of words in the provided template and
- * mark the beginning and end of those words. Cases when occurrences overlap are handles properly.
+ * mark the beginning and end of those words. Note that when occurrences overlap the overlaps are
+ * merged and the border between them is not visualized.
  * 
  * @author Tomasz Pylak
  */
 public class OccurrencesMarker
 {
-    private final String startMarker;
+    private final char startMarker;
 
-    private final String endMarker;
+    private final char endMarker;
 
     /** Produces an HTML code whih all occurrences properly marked */
-    public static String markOccurrencesWithHtml(String template, List<String> words)
+    public static String markOccurrencesWithHtml(String template, List<String> words, int lineLength)
+    {
+        char start = '(';
+        char end = ')';
+        OccurrencesMarker marker = new OccurrencesMarker(start, end);
+        String markedTemplate = marker.mark(template, words);
+        markedTemplate = breakLines(markedTemplate, lineLength, "<br>");
+        markedTemplate = marker.replaceTags(markedTemplate, "<font color='red'>", "</font>");
+        return markedTemplate;
+    }
+
+    // @Private
+    String replaceTags(String text, String startTag, String endTag)
+    {
+        String newText = text;
+        newText = newText.replaceAll("\\" + startMarker, startTag);
+        newText = newText.replaceAll("\\" + endMarker, endTag);
+        return newText;
+    }
+
+    /** split lines, so that each line has lineLength characters at most */
+    static String breakLines(String text, int lineLength, String endOfLine)
     {
-        String start = "(";
-        String end = ")";
-        List<String> markedTemplates = new OccurrencesMarker(start, end).mark(template, words);
         StringBuffer sb = new StringBuffer();
-        for (String markedTemplate : markedTemplates)
+        String textToBreak = text;
+        while (textToBreak.length() > lineLength)
         {
-            markedTemplate.replaceAll(start, "<font color='red'>");
-            markedTemplate.replaceAll(end, "</font>");
-            sb.append(markedTemplate);
-            sb.append("<BR>");
+            String line = textToBreak.substring(0, lineLength);
+            sb.append(line);
+            sb.append(endOfLine);
+            textToBreak = textToBreak.substring(lineLength);
         }
+        sb.append(textToBreak);
         return sb.toString();
     }
 
-    public OccurrencesMarker(String startMarker, String endMarker)
+    public OccurrencesMarker(char startMarker, char endMarker)
     {
         this.startMarker = startMarker;
         this.endMarker = endMarker;
     }
 
     /**
-     * If the words occurrences do not overlap in the template, the returned list has only one
-     * element. Otherwise there are several elements, each contains non-overlapping marks. skips
-     * words which do not occur in the template.
+     * All letters which belong to the matching words are marked. If some words overlap, the markers
+     * are merged. E.g. for the template 'xabcx' and words 'ab' and 'bc', the result will be
+     * 'x(abc)x'.
+     * <p>
+     * Words which do not occur in the template are ignored.
+     * </p>
      */
-    public List<String> mark(String template, List<String> words)
+    public String mark(String template, List<String> words)
     {
         List<Occurrence> occurrences = calcSortedOccurrences(template, words);
         if (hasOverlapping(occurrences))
@@ -69,7 +92,7 @@ public class OccurrencesMarker
             return markOverlapping(template, occurrences);
         } else
         {
-            return Arrays.asList(markNonoverlapping(template, occurrences));
+            return markNonoverlapping(template, occurrences);
         }
     }
 
@@ -118,64 +141,57 @@ public class OccurrencesMarker
         {
             return getStartIndex() - o.getStartIndex();
         }
-    }
 
-    private List<String> markOverlapping(String template, List<Occurrence> sortedOccurrences)
-    {
-        List<List<Occurrence>> distinctOccurrencesList = splitToDistinctGroups(sortedOccurrences);
-        List<String> result = new ArrayList<String>();
-        for (List<Occurrence> distinctOccurrences : distinctOccurrencesList)
+        @Override
+        public String toString()
         {
-            String marked = markNonoverlapping(template, distinctOccurrences);
-            result.add(marked);
+            return "[" + word + "@" + startIndex + "]";
         }
-        return result;
     }
 
-    // Splits all occurrences into many groups in such a way, that occurrences in one group do not
-    // overlap with each other.
-    private static List<List<Occurrence>> splitToDistinctGroups(List<Occurrence> sortedOccurrences)
+    private String markOverlapping(String template, List<Occurrence> sortedOccurrences)
     {
-        setVisitedFlag(sortedOccurrences, false);
-        List<List<Occurrence>> result = new ArrayList<List<Occurrence>>();
-        int unvisited = sortedOccurrences.size();
-        while (unvisited > 0)
-        {
-            List<Occurrence> distinctOccurrences =
-                    chooseDistinctUnvisitedOccurrences(sortedOccurrences);
-            result.add(distinctOccurrences);
-
-            setVisitedFlag(distinctOccurrences, true);
-            unvisited -= distinctOccurrences.size();
-        }
-        return result;
+        List<Occurrence> mergedOccurrences = mergeOverlaps(template, sortedOccurrences);
+        return markNonoverlapping(template, mergedOccurrences);
     }
 
-    private static List<Occurrence> chooseDistinctUnvisitedOccurrences(
-            List<Occurrence> sortedOccurrences)
+    private List<Occurrence> mergeOverlaps(String template, List<Occurrence> sortedOccurrences)
     {
-        List<Occurrence> distinctOccurrences = new ArrayList<Occurrence>();
-        int lastIncludedCharIndex = -1;
+        List<Occurrence> result = new ArrayList<Occurrence>();
+        if (sortedOccurrences.size() == 0)
+        {
+            return result;
+        }
+        int startIndex = -1;
+        int endIndex = -1;
         for (Occurrence occurrence : sortedOccurrences)
         {
-            if (occurrence.isVisited() == false)
-            {
-                if (occurrence.getStartIndex() > lastIncludedCharIndex)
-                {
-                    distinctOccurrences.add(occurrence);
-                    lastIncludedCharIndex = occurrence.getEndIndex();
+            if (occurrence.getStartIndex() <= endIndex)
+            { // overlap
+                endIndex = Math.max(endIndex, occurrence.getEndIndex());
+            } else
+            { // current word does not overlap with the words browsed before
+                if (startIndex != -1)
+                { // create a new word from the words browsed before
+                    Occurrence newOccurrence = createOccurence(template, startIndex, endIndex);
+                    result.add(newOccurrence);
                 }
+                startIndex = occurrence.getStartIndex();
+                endIndex = occurrence.getEndIndex();
             }
         }
-        return distinctOccurrences;
+        Occurrence newOccurrence = createOccurence(template, startIndex, endIndex);
+        result.add(newOccurrence);
+        return result;
     }
 
-    private static void setVisitedFlag(List<Occurrence> occurrences, boolean visited)
+    private static Occurrence createOccurence(String template, int startIndex, int endIndex)
     {
-        for (Occurrence occurrence : occurrences)
-        {
-            occurrence.setVisited(visited);
-        }
+        assert startIndex != -1 : "start index should be initialized";
+        assert endIndex != -1 : "end index should be initialized";
+        String mergedWord = template.substring(startIndex, endIndex + 1);
+        Occurrence newOccurrence = new Occurrence(mergedWord, startIndex);
+        return newOccurrence;
     }
 
     // marks all occurrences in the template, assuming that all occurrences do not
@@ -207,7 +223,7 @@ public class OccurrencesMarker
             {
                 return true;
             }
-            prevEndIndex = occurrence.getEndIndex();
+            prevEndIndex = Math.max(prevEndIndex, occurrence.getEndIndex());
         }
         return false;
     }
@@ -240,5 +256,4 @@ public class OccurrencesMarker
         }
         return result;
     }
-
 }
diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java
index 6f6174c7286..f6bde440444 100644
--- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java
+++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java
@@ -17,7 +17,6 @@
 package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -61,6 +60,8 @@ public class ProteinViewer extends
         AbstractViewer<IPhosphoNetXClientServiceAsync, IEntityInformationHolder> implements
         IDatabaseModificationObserver
 {
+    private static final int AMINOACIDS_IN_SEQUENCE_PER_LINE = 60;
+
     private static final String PREFIX = "protein-viewer_";
 
     public static final String ID_PREFIX = GenericConstants.ID_PREFIX + PREFIX;
@@ -124,8 +125,6 @@ public class ProteinViewer extends
 
     private void recreateUI(ProteinByExperiment protein)
     {
-        // fillDebugData(protein);
-
         setLayout(new BorderLayout());
         removeAll();
         setScrollMode(Scroll.AUTO);
@@ -143,28 +142,6 @@ public class ProteinViewer extends
         }
     }
 
-    // TODO 2009-07-17, Tomasz Pylak: remove me!!!!!!!!!!!!!!!!!!!!!!!!!
-    private static void fillDebugData(ProteinByExperiment protein)
-    {
-        ProteinDetails details = new ProteinDetails();
-        details.setDatabaseNameAndVersion("database");
-        details
-                .setSequence("ISDFHSJDGJHFGHHJKDGHJGBGBKJBGSHDFYUERGFUYEWBGCHJEBVHJERBCVUYERBFYUEWBCYUWERBCUYEBCUYEBR");
-        details.setDataSetPermID("20090716105149429-34");
-        details.setDataSetTechID(15L);
-        details.setDataSetTypeCode("PROT_RESULT");
-        details.setFalseDiscoveryRate(123);
-
-        // - more than once
-        // - overlapping
-        List<Peptide> peptides =
-                Arrays.asList(createPeptide("SJDGJHFGHHJ"), createPeptide("FUYEWBGCHJEBVH"),
-                        createPeptide("YERBFYUEWBCYU"),
-                        createPeptide("ERBCVUYERBFYUEWBCYUWERBCUYEB"));
-        details.setPeptides(peptides);
-        protein.setDetails(details);
-    }
-
     private static Peptide createPeptide(String sequence)
     {
         Peptide peptide = new Peptide();
@@ -223,12 +200,11 @@ public class ProteinViewer extends
     {
         properties.put(viewContext.getMessage(Dict.DATABASE_NAME_AND_VERSION), proteinDetails
                 .getDatabaseNameAndVersion());
-        properties.put(viewContext.getMessage(Dict.SEQUENCE_NAME), proteinDetails.getSequence());
-        properties.put(viewContext.getMessage(Dict.PEPTIDE_COUNT), proteinDetails.getPeptides()
-                .size());
         String markedSequence =
                 markPeptides(proteinDetails.getSequence(), proteinDetails.getPeptides());
-        properties.put(viewContext.getMessage(Dict.PEPTIDE), markedSequence);
+        properties.put(viewContext.getMessage(Dict.SEQUENCE_NAME), markedSequence);
+        properties.put(viewContext.getMessage(Dict.PEPTIDE_COUNT), proteinDetails.getPeptides()
+                .size());
 
         properties.put(viewContext.getMessage(Dict.FDR), proteinDetails.getFalseDiscoveryRate());
 
@@ -241,7 +217,11 @@ public class ProteinViewer extends
     private static String markPeptides(String sequence, List<Peptide> peptides)
     {
         List<String> peptideSequences = extractSequences(peptides);
-        return OccurrencesMarker.markOccurrencesWithHtml(sequence, peptideSequences);
+        String markedSequence =
+                OccurrencesMarker.markOccurrencesWithHtml(sequence, peptideSequences,
+                        AMINOACIDS_IN_SEQUENCE_PER_LINE);
+        // the letters should have fixed width
+        return "<font style=\"font-family:monospace\">" + markedSequence + "</font>";
     }
 
     private static List<String> extractSequences(List<Peptide> peptides)
diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
index 1f58f89ae6b..0986301308f 100644
--- a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
+++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java
@@ -17,43 +17,77 @@
 package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application;
 
 import java.util.Arrays;
-import java.util.List;
 
 import org.testng.AssertJUnit;
 import org.testng.annotations.Test;
 
+import ch.rinn.restrictions.Friend;
+
 /**
  * @author Tomasz Pylak
  */
+@Friend(toClasses = OccurrencesMarker.class)
 public class OccurrencesMarkerTest extends AssertJUnit
 {
     @Test
     public void testFindNonOverlappingOccurrences()
     {
-        OccurrencesMarker marker = new OccurrencesMarker("<", ">");
-        List<String> marked =
-                marker.mark("hello, my beautiful world!", Arrays.asList("hello", "world"));
-        assertEquals(1, marked.size());
-        assertEquals("<hello>, my beautiful <world>!", marked.get(0));
+        OccurrencesMarker marker = createMarker();
+        String marked = marker.mark("hello, my beautiful world!", Arrays.asList("hello", "world"));
+        assertEquals("<hello>, my beautiful <world>!", marked);
+    }
+
+    private OccurrencesMarker createMarker()
+    {
+        return new OccurrencesMarker('<', '>');
     }
 
     @Test
     public void testFindOverlappingOccurrences()
     {
-        OccurrencesMarker marker = new OccurrencesMarker("<", ">");
-        List<String> marked = marker.mark("aaaa", Arrays.asList("aa"));
-        assertEquals(2, marked.size());
-        assertEquals("<aa><aa>", marked.get(0));
-        assertEquals("a<aa>a", marked.get(1));
+        OccurrencesMarker marker = createMarker();
+        String marked = marker.mark("aaaa", Arrays.asList("aa"));
+        assertEquals("<aaaa>", marked);
     }
 
     @Test
     public void testFindNoOccurrences()
     {
-        OccurrencesMarker marker = new OccurrencesMarker("<", ">");
-        List<String> marked = marker.mark("aaaa", Arrays.asList("x"));
-        assertEquals(1, marked.size());
-        assertEquals("aaaa", marked.get(0));
+        OccurrencesMarker marker = createMarker();
+        String marked = marker.mark("aaaa", Arrays.asList("x"));
+        assertEquals("aaaa", marked);
+    }
+
+    @Test
+    public void testFindContainingOccurrences()
+    {
+        OccurrencesMarker marker = createMarker();
+        String marked = marker.mark("xabcx", Arrays.asList("abc", "b"));
+        assertEquals("x<abc>x", marked);
+    }
+
+    @Test
+    public void testBreakLinesLastLineNotFull()
+    {
+        String lines = OccurrencesMarker.breakLines("1234567", 3, "x");
+        assertEquals("123x456x7", lines);
+    }
+
+    @Test
+    public void testBreakLinesLastLineFull()
+    {
+        String lines = OccurrencesMarker.breakLines("123456", 3, "x");
+        assertEquals("123x456", lines);
+    }
+
+    @Test
+    public void testReplaceTags()
+    {
+        String marked = new OccurrencesMarker('(', ')').replaceTags("a(a)a", "<", ">");
+        assertEquals("a<a>a", marked);
+
+        marked = new OccurrencesMarker('<', '>').replaceTags("a<a>a", "(", ")");
+        assertEquals("a(a)a", marked);
     }
 
 }
-- 
GitLab