From 9a004c9c9166ab52f58dcdd9463292956d98dce4 Mon Sep 17 00:00:00 2001 From: felmer <felmer> Date: Mon, 16 Nov 2009 15:38:27 +0000 Subject: [PATCH] SE-163 SVN: 13426 --- .../phosphonetx/BuildAndEnvironmentInfo.java | 46 ------ .../client/application/OccurrencesMarker.java | 148 +----------------- .../shared/basic/dto/Occurrence.java | 64 ++++++++ .../shared/basic/dto/OccurrenceUtil.java | 84 ++++++++++ .../application/OccurrencesMarkerTest.java | 4 +- .../shared/basic/dto/OccurrenceUtilTest.java | 46 ++++++ 6 files changed, 200 insertions(+), 192 deletions(-) delete mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java create mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java create mode 100644 rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java create mode 100644 rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java deleted file mode 100644 index 3cfcbdfbfd9..00000000000 --- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/BuildAndEnvironmentInfo.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2009 ETH Zuerich, CISD - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ch.systemsx.cisd.openbis.plugin.phosphonetx; - -import ch.systemsx.cisd.base.utilities.AbstractBuildAndEnvironmentInfo; - - -/** - * The build and environment information for PhosphoNetX. - * - * @author Franz-Josef Elmer - */ -public class BuildAndEnvironmentInfo extends AbstractBuildAndEnvironmentInfo -{ - private final static String BASE = "rtd_phosphonetx"; - - public final static BuildAndEnvironmentInfo INSTANCE = new BuildAndEnvironmentInfo(); - - private BuildAndEnvironmentInfo() - { - super(BASE); - } - - /** - * Shows build and environment information on the console. - */ - public static void main(String[] args) - { - System.out.println(INSTANCE); - } - -} diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java index 006bd2f2cf2..87cd57b8d6c 100644 --- a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarker.java @@ -16,10 +16,11 @@ package ch.systemsx.cisd.openbis.plugin.phosphonetx.client.web.client.application; -import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto.Occurrence; +import ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto.OccurrenceUtil; + /** * This class is able to find all occurrences of the given set of words in the provided template and * mark the beginning and end of those words. Note that when occurrences overlap the overlaps are @@ -135,105 +136,7 @@ public class OccurrencesMarker */ public String mark(String template, List<String> words) { - List<Occurrence> occurrences = calcSortedOccurrences(template, words); - if (hasOverlapping(occurrences)) - { - return markOverlapping(template, occurrences); - } else - { - return markNonoverlapping(template, occurrences); - } - } - - // describes one occurence of the word in a template - private static class Occurrence implements Comparable<Occurrence> - { - private final String word; - - private final int startIndex; - - public Occurrence(String word, int startIndex) - { - this.word = word; - this.startIndex = startIndex; - } - - public String getWord() - { - return word; - } - - public int getStartIndex() - { - return startIndex; - } - - public int getEndIndex() - { - return startIndex + word.length() - 1; - } - - public int compareTo(Occurrence o) - { - return getStartIndex() - o.getStartIndex(); - } - - @Override - public String toString() - { - return "[" + word + "@" + startIndex + "]"; - } - } - - private String markOverlapping(String template, List<Occurrence> sortedOccurrences) - { - List<Occurrence> mergedOccurrences = mergeOverlaps(template, sortedOccurrences); - return markNonoverlapping(template, mergedOccurrences); - } - - private List<Occurrence> mergeOverlaps(String template, List<Occurrence> sortedOccurrences) - { - List<Occurrence> result = new ArrayList<Occurrence>(); - if (sortedOccurrences.size() == 0) - { - return result; - } - int startIndex = -1; - int endIndex = -1; - for (Occurrence occurrence : sortedOccurrences) - { - if (occurrence.getStartIndex() <= endIndex) - { // overlap - endIndex = Math.max(endIndex, occurrence.getEndIndex()); - } else - { // current word does not overlap with the words browsed before - if (startIndex != -1) - { // create a new word from the words browsed before - Occurrence newOccurrence = createOccurence(template, startIndex, endIndex); - result.add(newOccurrence); - } - startIndex = occurrence.getStartIndex(); - endIndex = occurrence.getEndIndex(); - } - } - Occurrence newOccurrence = createOccurence(template, startIndex, endIndex); - result.add(newOccurrence); - return result; - } - - private static Occurrence createOccurence(String template, int startIndex, int endIndex) - { - assert startIndex != -1 : "start index should be initialized"; - assert endIndex != -1 : "end index should be initialized"; - String mergedWord = template.substring(startIndex, endIndex + 1); - Occurrence newOccurrence = new Occurrence(mergedWord, startIndex); - return newOccurrence; - } - - // marks all occurrences in the template, assuming that all occurrences do not - // overlap with each other and are sorted by the start position - private String markNonoverlapping(String template, List<Occurrence> sortedOccurrences) - { + List<Occurrence> sortedOccurrences = OccurrenceUtil.getCoverage(template, words); StringBuffer sb = new StringBuffer(); int nextUnprocessedCharIndex = 0; for (Occurrence occurrence : sortedOccurrences) @@ -249,47 +152,4 @@ public class OccurrencesMarker return sb.toString(); } - // true if two occurrences of the same word in the template overlap (have a common part) - private static boolean hasOverlapping(List<Occurrence> occurrences) - { - int prevEndIndex = -1; - for (Occurrence occurrence : occurrences) - { - if (occurrence.getStartIndex() <= prevEndIndex) - { - return true; - } - prevEndIndex = Math.max(prevEndIndex, occurrence.getEndIndex()); - } - return false; - } - - // calculates a list of all words occurances, sorts it be starting position - private static List<Occurrence> calcSortedOccurrences(String template, List<String> words) - { - List<Occurrence> result = new ArrayList<Occurrence>(); - for (String word : words) - { - result.addAll(calcOccurrences(template, word)); - } - Collections.sort(result); - return result; - } - - private static List<Occurrence> calcOccurrences(String template, String word) - { - List<Occurrence> result = new ArrayList<Occurrence>(); - int startIndex = 0; - while (true) - { - int occurrenceIndex = template.indexOf(word, startIndex); - if (occurrenceIndex == -1) - { - break; - } - result.add(new Occurrence(word, occurrenceIndex)); - startIndex = occurrenceIndex + 1; // maybe the word overlaps with itself? - } - return result; - } } diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java new file mode 100644 index 00000000000..0a79583f908 --- /dev/null +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/Occurrence.java @@ -0,0 +1,64 @@ +/* + * Copyright 2009 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto; + +/** + * The occurrence of a word in a text. Occurrences are comparable by their start indexes. + * + * @author Tomasz Pylak + */ +public class Occurrence implements Comparable<Occurrence> +{ + private final String word; + + private final int startIndex; + + /** + * Creates a new instance of the specified word which appears at the specified start index. + */ + public Occurrence(String word, int startIndex) + { + this.word = word; + this.startIndex = startIndex; + } + + public String getWord() + { + return word; + } + + public int getStartIndex() + { + return startIndex; + } + + public int getEndIndex() + { + return startIndex + word.length() - 1; + } + + public int compareTo(Occurrence o) + { + return getStartIndex() - o.getStartIndex(); + } + + @Override + public String toString() + { + return "[" + word + "@" + startIndex + "]"; + } +} \ No newline at end of file diff --git a/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java new file mode 100644 index 00000000000..a9564e33dee --- /dev/null +++ b/rtd_phosphonetx/source/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtil.java @@ -0,0 +1,84 @@ +/* + * Copyright 2009 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + + +/** + * Helper class for calculating peptide on protein coverage. + * + * @author Franz-Josef Elmer + */ +public class OccurrenceUtil +{ + /** + * Returns a list of non-overlapping sequences built by the specified peptides covering the + * specified protein. + */ + public static List<Occurrence> getCoverage(String protein, List<String> peptides) + { + ArrayList<Occurrence> list = new ArrayList<Occurrence>(); + List<Occurrence> sortedList = calcSortedOccurrences(protein, peptides); + if (sortedList.isEmpty() == false) + { + Occurrence current = sortedList.get(0); + for (int i = 1; i < sortedList.size(); i++) + { + String currentPeptide = current.getWord(); + Occurrence next = sortedList.get(i); + int diff = next.getStartIndex() - current.getStartIndex(); + if (current.getWord().length() < diff) + { + list.add(current); + current = next; + } else if (current.getEndIndex() < next.getEndIndex()) + { + String mergedPeptides = currentPeptide.substring(0, diff) + next.getWord(); + current = new Occurrence(mergedPeptides, current.getStartIndex()); + } + } + list.add(current); + } + return list; + } + + // calculates a list of all words occurences, sorts it be starting position + private static List<Occurrence> calcSortedOccurrences(String protein, List<String> peptides) + { + List<Occurrence> result = new ArrayList<Occurrence>(); + for (String word : peptides) + { + int startIndex = 0; + while (true) + { + int occurrenceIndex = protein.indexOf(word, startIndex); + if (occurrenceIndex == -1) + { + break; + } + result.add(new Occurrence(word, occurrenceIndex)); + startIndex = occurrenceIndex + 1; // maybe the word overlaps with itself? + } + } + Collections.sort(result); + return result; + } + +} diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java index 3679c81b71d..84aefd07bd0 100644 --- a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java +++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/client/web/client/application/OccurrencesMarkerTest.java @@ -46,8 +46,8 @@ public class OccurrencesMarkerTest extends AssertJUnit public void testFindOverlappingOccurrences() { OccurrencesMarker marker = createMarker(); - String marked = marker.mark("aaaa", Arrays.asList("aa")); - assertEquals("<aaaa>", marked); + assertEquals("<aaaa>", marker.mark("aaaa", Arrays.asList("aa"))); + assertEquals("h<ello> w<orld>, h<ello> universe!", marker.mark("hello world, hello universe!", Arrays.asList("ell", "o", "orld"))); } @Test diff --git a/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java new file mode 100644 index 00000000000..0b14def68ad --- /dev/null +++ b/rtd_phosphonetx/sourceTest/java/ch/systemsx/cisd/openbis/plugin/phosphonetx/shared/basic/dto/OccurrenceUtilTest.java @@ -0,0 +1,46 @@ +/* + * Copyright 2009 ETH Zuerich, CISD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package ch.systemsx.cisd.openbis.plugin.phosphonetx.shared.basic.dto; + +import java.util.Arrays; + +import org.testng.AssertJUnit; +import org.testng.annotations.Test; + +/** + * + * + * @author Franz-Josef Elmer + */ +public class OccurrenceUtilTest extends AssertJUnit +{ + @Test + public void test() + { + check("[[ab@0], [abcd@4], [ab@13]]", "abc abcde hahab", "ab", "bcd"); + check("[[ab@0], [abcd@4], [ab@13]]", "abc abcde hahab", "ab", "cd"); + check("[[abc@0], [abc@4], [ab@13]]", "abc abcde hahab", "ab", "b", "c"); + check("[[abc@0], [abc@4], [b@14]]", "abc abcde hahab", "abc", "b"); + check("[[abcde@0]]", "abcdef", "abcd", "b", "de"); + check("[[haha@10]]", "abc abcde hahab", "haha", "h"); + } + + private void check(String expectedList, String sequence, String... words) + { + assertEquals(expectedList, OccurrenceUtil.getCoverage(sequence, Arrays.asList(words)).toString()); + } +} -- GitLab