From 09436561a5b39d37a51056a61b440adc0de95ae5 Mon Sep 17 00:00:00 2001 From: tpylak <tpylak> Date: Fri, 17 Jul 2009 12:24:59 +0000 Subject: [PATCH] LMS-1028 break sequence into many shorter lines, mark peptides on the sequence SVN: 11813 --- .../client/web/client/application/Dict.java | 2 - .../client/application/OccurrencesMarker.java | 139 ++++++++++-------- .../web/client/application/ProteinViewer.java | 40 ++--- .../application/OccurrencesMarkerTest.java | 64 ++++++-- 4 files changed, 136 insertions(+), 109 deletions(-) diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java index d882974f053..41abf3dcb77 100644 --- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/Dict.java @@ -52,8 +52,6 @@ public class Dict public static final String DATA_SET_PERM_ID = "data_set_perm_id"; - public static final String PEPTIDE = "peptide"; - public static final String PEPTIDE_COUNT = "peptide_count"; public static final String SEQUENCE_NAME = "sequence_name"; diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java index 27d0ba462b7..cdc2b47c85a 100644 --- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java @@ -17,51 +17,74 @@ package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; /** * This class is able to find all occurrences of the given set of words in the provided template and - * mark the beginning and end of those words. Cases when occurrences overlap are handles properly. + * mark the beginning and end of those words. Note that when occurrences overlap the overlaps are + * merged and the border between them is not visualized. * * @author Tomasz Pylak */ public class OccurrencesMarker { - private final String startMarker; + private final char startMarker; - private final String endMarker; + private final char endMarker; /** Produces an HTML code whih all occurrences properly marked */ - public static String markOccurrencesWithHtml(String template, List<String> words) + public static String markOccurrencesWithHtml(String template, List<String> words, int lineLength) + { + char start = '('; + char end = ')'; + OccurrencesMarker marker = new OccurrencesMarker(start, end); + String markedTemplate = marker.mark(template, words); + markedTemplate = breakLines(markedTemplate, lineLength, "<br>"); + markedTemplate = marker.replaceTags(markedTemplate, "<font color='red'>", "</font>"); + return markedTemplate; + } + + // @Private + String replaceTags(String text, String startTag, String endTag) + { + String newText = text; + newText = newText.replaceAll("\\" + startMarker, startTag); + newText = newText.replaceAll("\\" + endMarker, endTag); + return newText; + } + + /** split lines, so that each line has lineLength characters at most */ + static String breakLines(String text, int lineLength, String endOfLine) { - String start = "("; - String end = ")"; - List<String> markedTemplates = new OccurrencesMarker(start, end).mark(template, words); StringBuffer sb = new StringBuffer(); - for (String markedTemplate : markedTemplates) + String textToBreak = text; + while (textToBreak.length() > lineLength) { - markedTemplate.replaceAll(start, "<font color='red'>"); - markedTemplate.replaceAll(end, "</font>"); - sb.append(markedTemplate); - sb.append("<BR>"); + String line = textToBreak.substring(0, lineLength); + sb.append(line); + sb.append(endOfLine); + textToBreak = textToBreak.substring(lineLength); } + sb.append(textToBreak); return sb.toString(); } - public OccurrencesMarker(String startMarker, String endMarker) + public OccurrencesMarker(char startMarker, char endMarker) { this.startMarker = startMarker; this.endMarker = endMarker; } /** - * If the words occurrences do not overlap in the template, the returned list has only one - * element. Otherwise there are several elements, each contains non-overlapping marks. skips - * words which do not occur in the template. + * All letters which belong to the matching words are marked. If some words overlap, the markers + * are merged. E.g. for the template 'xabcx' and words 'ab' and 'bc', the result will be + * 'x(abc)x'. + * <p> + * Words which do not occur in the template are ignored. + * </p> */ - public List<String> mark(String template, List<String> words) + public String mark(String template, List<String> words) { List<Occurrence> occurrences = calcSortedOccurrences(template, words); if (hasOverlapping(occurrences)) @@ -69,7 +92,7 @@ public class OccurrencesMarker return markOverlapping(template, occurrences); } else { - return Arrays.asList(markNonoverlapping(template, occurrences)); + return markNonoverlapping(template, occurrences); } } @@ -118,64 +141,57 @@ public class OccurrencesMarker { return getStartIndex() - o.getStartIndex(); } - } - private List<String> markOverlapping(String template, List<Occurrence> sortedOccurrences) - { - List<List<Occurrence>> distinctOccurrencesList = splitToDistinctGroups(sortedOccurrences); - List<String> result = new ArrayList<String>(); - for (List<Occurrence> distinctOccurrences : distinctOccurrencesList) + @Override + public String toString() { - String marked = markNonoverlapping(template, distinctOccurrences); - result.add(marked); + return "[" + word + "@" + startIndex + "]"; } - return result; } - // Splits all occurrences into many groups in such a way, that occurrences in one group do not - // overlap with each other. - private static List<List<Occurrence>> splitToDistinctGroups(List<Occurrence> sortedOccurrences) + private String markOverlapping(String template, List<Occurrence> sortedOccurrences) { - setVisitedFlag(sortedOccurrences, false); - List<List<Occurrence>> result = new ArrayList<List<Occurrence>>(); - int unvisited = sortedOccurrences.size(); - while (unvisited > 0) - { - List<Occurrence> distinctOccurrences = - chooseDistinctUnvisitedOccurrences(sortedOccurrences); - result.add(distinctOccurrences); - - setVisitedFlag(distinctOccurrences, true); - unvisited -= distinctOccurrences.size(); - } - return result; + List<Occurrence> mergedOccurrences = mergeOverlaps(template, sortedOccurrences); + return markNonoverlapping(template, mergedOccurrences); } - private static List<Occurrence> chooseDistinctUnvisitedOccurrences( - List<Occurrence> sortedOccurrences) + private List<Occurrence> mergeOverlaps(String template, List<Occurrence> sortedOccurrences) { - List<Occurrence> distinctOccurrences = new ArrayList<Occurrence>(); - int lastIncludedCharIndex = -1; + List<Occurrence> result = new ArrayList<Occurrence>(); + if (sortedOccurrences.size() == 0) + { + return result; + } + int startIndex = -1; + int endIndex = -1; for (Occurrence occurrence : sortedOccurrences) { - if (occurrence.isVisited() == false) - { - if (occurrence.getStartIndex() > lastIncludedCharIndex) - { - distinctOccurrences.add(occurrence); - lastIncludedCharIndex = occurrence.getEndIndex(); + if (occurrence.getStartIndex() <= endIndex) + { // overlap + endIndex = Math.max(endIndex, occurrence.getEndIndex()); + } else + { // current word does not overlap with the words browsed before + if (startIndex != -1) + { // create a new word from the words browsed before + Occurrence newOccurrence = createOccurence(template, startIndex, endIndex); + result.add(newOccurrence); } + startIndex = occurrence.getStartIndex(); + endIndex = occurrence.getEndIndex(); } } - return distinctOccurrences; + Occurrence newOccurrence = createOccurence(template, startIndex, endIndex); + result.add(newOccurrence); + return result; } - private static void setVisitedFlag(List<Occurrence> occurrences, boolean visited) + private static Occurrence createOccurence(String template, int startIndex, int endIndex) { - for (Occurrence occurrence : occurrences) - { - occurrence.setVisited(visited); - } + assert startIndex != -1 : "start index should be initialized"; + assert endIndex != -1 : "end index should be initialized"; + String mergedWord = template.substring(startIndex, endIndex + 1); + Occurrence newOccurrence = new Occurrence(mergedWord, startIndex); + return newOccurrence; } // marks all occurrences in the template, assuming that all occurrences do not @@ -207,7 +223,7 @@ public class OccurrencesMarker { return true; } - prevEndIndex = occurrence.getEndIndex(); + prevEndIndex = Math.max(prevEndIndex, occurrence.getEndIndex()); } return false; } @@ -240,5 +256,4 @@ public class OccurrencesMarker } return result; } - } diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java index 6f6174c7286..f6bde440444 100644 --- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/ProteinViewer.java @@ -17,7 +17,6 @@ package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application; import java.util.ArrayList; -import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -61,6 +60,8 @@ public class ProteinViewer extends AbstractViewer<IPhosphoNetXClientServiceAsync, IEntityInformationHolder> implements IDatabaseModificationObserver { + private static final int AMINOACIDS_IN_SEQUENCE_PER_LINE = 60; + private static final String PREFIX = "protein-viewer_"; public static final String ID_PREFIX = GenericConstants.ID_PREFIX + PREFIX; @@ -124,8 +125,6 @@ public class ProteinViewer extends private void recreateUI(ProteinByExperiment protein) { - // fillDebugData(protein); - setLayout(new BorderLayout()); removeAll(); setScrollMode(Scroll.AUTO); @@ -143,28 +142,6 @@ public class ProteinViewer extends } } - // TODO 2009-07-17, Tomasz Pylak: remove me!!!!!!!!!!!!!!!!!!!!!!!!! - private static void fillDebugData(ProteinByExperiment protein) - { - ProteinDetails details = new ProteinDetails(); - details.setDatabaseNameAndVersion("database"); - details - .setSequence("ISDFHSJDGJHFGHHJKDGHJGBGBKJBGSHDFYUERGFUYEWBGCHJEBVHJERBCVUYERBFYUEWBCYUWERBCUYEBCUYEBR"); - details.setDataSetPermID("20090716105149429-34"); - details.setDataSetTechID(15L); - details.setDataSetTypeCode("PROT_RESULT"); - details.setFalseDiscoveryRate(123); - - // - more than once - // - overlapping - List<Peptide> peptides = - Arrays.asList(createPeptide("SJDGJHFGHHJ"), createPeptide("FUYEWBGCHJEBVH"), - createPeptide("YERBFYUEWBCYU"), - createPeptide("ERBCVUYERBFYUEWBCYUWERBCUYEB")); - details.setPeptides(peptides); - protein.setDetails(details); - } - private static Peptide createPeptide(String sequence) { Peptide peptide = new Peptide(); @@ -223,12 +200,11 @@ public class ProteinViewer extends { properties.put(viewContext.getMessage(Dict.DATABASE_NAME_AND_VERSION), proteinDetails .getDatabaseNameAndVersion()); - properties.put(viewContext.getMessage(Dict.SEQUENCE_NAME), proteinDetails.getSequence()); - properties.put(viewContext.getMessage(Dict.PEPTIDE_COUNT), proteinDetails.getPeptides() - .size()); String markedSequence = markPeptides(proteinDetails.getSequence(), proteinDetails.getPeptides()); - properties.put(viewContext.getMessage(Dict.PEPTIDE), markedSequence); + properties.put(viewContext.getMessage(Dict.SEQUENCE_NAME), markedSequence); + properties.put(viewContext.getMessage(Dict.PEPTIDE_COUNT), proteinDetails.getPeptides() + .size()); properties.put(viewContext.getMessage(Dict.FDR), proteinDetails.getFalseDiscoveryRate()); @@ -241,7 +217,11 @@ public class ProteinViewer extends private static String markPeptides(String sequence, List<Peptide> peptides) { List<String> peptideSequences = extractSequences(peptides); - return OccurrencesMarker.markOccurrencesWithHtml(sequence, peptideSequences); + String markedSequence = + OccurrencesMarker.markOccurrencesWithHtml(sequence, peptideSequences, + AMINOACIDS_IN_SEQUENCE_PER_LINE); + // the letters should have fixed width + return "<font style=\"font-family:monospace\">" + markedSequence + "</font>"; } private static List<String> extractSequences(List<Peptide> peptides) diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java index 1f58f89ae6b..0986301308f 100644 --- a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java +++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java @@ -17,43 +17,77 @@ package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application; import java.util.Arrays; -import java.util.List; import org.testng.AssertJUnit; import org.testng.annotations.Test; +import ch.rinn.restrictions.Friend; + /** * @author Tomasz Pylak */ +@Friend(toClasses = OccurrencesMarker.class) public class OccurrencesMarkerTest extends AssertJUnit { @Test public void testFindNonOverlappingOccurrences() { - OccurrencesMarker marker = new OccurrencesMarker("<", ">"); - List<String> marked = - marker.mark("hello, my beautiful world!", Arrays.asList("hello", "world")); - assertEquals(1, marked.size()); - assertEquals("<hello>, my beautiful <world>!", marked.get(0)); + OccurrencesMarker marker = createMarker(); + String marked = marker.mark("hello, my beautiful world!", Arrays.asList("hello", "world")); + assertEquals("<hello>, my beautiful <world>!", marked); + } + + private OccurrencesMarker createMarker() + { + return new OccurrencesMarker('<', '>'); } @Test public void testFindOverlappingOccurrences() { - OccurrencesMarker marker = new OccurrencesMarker("<", ">"); - List<String> marked = marker.mark("aaaa", Arrays.asList("aa")); - assertEquals(2, marked.size()); - assertEquals("<aa><aa>", marked.get(0)); - assertEquals("a<aa>a", marked.get(1)); + OccurrencesMarker marker = createMarker(); + String marked = marker.mark("aaaa", Arrays.asList("aa")); + assertEquals("<aaaa>", marked); } @Test public void testFindNoOccurrences() { - OccurrencesMarker marker = new OccurrencesMarker("<", ">"); - List<String> marked = marker.mark("aaaa", Arrays.asList("x")); - assertEquals(1, marked.size()); - assertEquals("aaaa", marked.get(0)); + OccurrencesMarker marker = createMarker(); + String marked = marker.mark("aaaa", Arrays.asList("x")); + assertEquals("aaaa", marked); + } + + @Test + public void testFindContainingOccurrences() + { + OccurrencesMarker marker = createMarker(); + String marked = marker.mark("xabcx", Arrays.asList("abc", "b")); + assertEquals("x<abc>x", marked); + } + + @Test + public void testBreakLinesLastLineNotFull() + { + String lines = OccurrencesMarker.breakLines("1234567", 3, "x"); + assertEquals("123x456x7", lines); + } + + @Test + public void testBreakLinesLastLineFull() + { + String lines = OccurrencesMarker.breakLines("123456", 3, "x"); + assertEquals("123x456", lines); + } + + @Test + public void testReplaceTags() + { + String marked = new OccurrencesMarker('(', ')').replaceTags("a(a)a", "<", ">"); + assertEquals("a<a>a", marked); + + marked = new OccurrencesMarker('<', '>').replaceTags("a<a>a", "(", ")"); + assertEquals("a(a)a", marked); } } -- GitLab