From 49783fe28c6d3b6caafe3c53fcdfeae70999c6f5 Mon Sep 17 00:00:00 2001 From: juanf <juanf> Date: Tue, 14 Apr 2015 11:49:51 +0000 Subject: [PATCH] SSDM-1728: YeastLab Data Curation - Report SVN: 33820 --- .../stellingconsistency/consistency.py | 149 ++++++++++++++++++ .../stellingconsistency/plugin.properties | 9 ++ 2 files changed, 158 insertions(+) create mode 100644 plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py create mode 100644 plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/plugin.properties diff --git a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py new file mode 100644 index 00000000000..71f36a5165e --- /dev/null +++ b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py @@ -0,0 +1,149 @@ +# some_file.py +from datetime import datetime +from ch.systemsx.cisd.openbis.generic.shared.api.v1.dto import SearchCriteria, SearchSubCriteria +import ch.systemsx.cisd.openbis.generic.server.jython.api.v1.DataType as DataType +import xml.etree.ElementTree as ET + +## +## Definitions +## + +definitions = { + "YEAST" : {}, + "POMBE" : {} +}; + +logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"]; + +## +## Logging +## + +numberOfManualFixes = 0; +def log(level, message): + if level == "MANUAL-FIX": + global numberOfManualFixes + numberOfManualFixes = numberOfManualFixes + 1 + if any(level in s for s in logLevelsToPrint): + print "[" + level + "] " + message; + +## +## Cache +## +currentCache = None + +def getSampleFromCache(identifier): + sampleToReturn = None + for sample in currentCache: + if sample.getSampleIdentifier() == identifier: + return sample + return None + +## +## Search +## + +def getSamplesByType(tr, sampleType): + criteria = SearchCriteria() + criteria.setOperator(criteria.SearchOperator.MATCH_ANY_CLAUSES) + criteria.addMatchClause(criteria.MatchClause.createAttributeMatch(criteria.MatchClauseAttribute.TYPE, sampleType)) + samples = tr.getSearchService().searchForSamples(criteria) + return samples + +def getSampleByPermId(tr, permId): + criteria = SearchCriteria() + criteria.setOperator(criteria.SearchOperator.MATCH_ANY_CLAUSES) + criteria.addMatchClause(criteria.MatchClause.createAttributeMatch(criteria.MatchClauseAttribute.PERM_ID, permId)) + samples = tr.getSearchService().searchForSamples(criteria) + if len(samples) is 1: + return samples[0] + else: + return None + +## +## Main Methods +## +def process(tr): + log("REPORT", "START VERIFICATION REPORT!"); + + for sampleType in definitions: + properties = definitions[sampleType] + samples = getSamplesByType(tr, sampleType) + global currentCache + currentCache = samples + print sampleType + ": "+ str(len(samples)) + for sample in samples: + verify(tr, sample, properties) + + global numberOfManualFixes + log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!"); + log("REPORT", "FINISH VERIFICATION REPORT!"); + +def verify(tr, sample, properties): + annotationsRoot = getAnnotationsRootNodeFromSample(sample) + if annotationsRoot is not None: + for annotation in annotationsRoot: + annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample + try: + if isChild(sample, annotatedSampleIdentifier): + #This is an annotation from a parent, this is by default correct and don't needs further inspection. + log("INFO", "GOOT ANNOTATION AT SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier); + else: + foundAnnotationAndAncestor = getAnnotationAndAncestor(annotatedSampleIdentifier, sample.getParentSampleIdentifiers()) + foundAnnotation = foundAnnotationAndAncestor[0] + foundAncestor = foundAnnotationAndAncestor[1] + if foundAnnotation is not None and foundAncestor is not None: + log("INFO", "BAD CHILD FOUND - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier); + if areAnnotationsEqual(annotation, foundAnnotation): + log("INFO", "GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier); + else: + log("MANUAL-FIX", "THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier()); + elif foundAncestor is None: + log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier); + elif foundAnnotation is None: + log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier()); + except Exception: + log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier()); + else: + pass #No valid annotations found + +def areAnnotationsEqual(annotationA, annotationB): + for key in annotationA.attrib: + value = annotationA.attrib[key] + if key != "CONTAINED": + if value != annotationB.attrib[key]: + log("INFO", "EQUALITY FAILED FOR " + key + ": - " + value + " " + annotationB.attrib[key]); + return False + return True + +def getAnnotationAndAncestor(annotatedSampleIdentifier, sampleParentsIdentifiers): + ancestorsIdentifiers = list(sampleParentsIdentifiers) + while( len(ancestorsIdentifiers) > 0): + ancestorIdentifier = ancestorsIdentifiers.pop(0) + ancestor = getSampleFromCache(ancestorIdentifier) + if ancestor is not None: + if isChild(ancestor, annotatedSampleIdentifier): #We only accept annotations from the original sample to avoid test repetitions + ancestorAnnotationsRoot = getAnnotationsRootNodeFromSample(ancestor) + if ancestorAnnotationsRoot is not None: + for annotation in ancestorAnnotationsRoot: + if annotation.attrib["identifier"] == annotatedSampleIdentifier: + return [annotation, ancestor] + return [None, ancestor] + else: + ancestorsIdentifiers.extend(ancestor.getParentSampleIdentifiers()) + return [None, None] #Should never happen + +def isChild(sample, identifier): + if any(identifier in s for s in sample.getParentSampleIdentifiers()): + return True + else: + return False + +def getAnnotationsRootNodeFromSample(sample): + annotations = sample.getPropertyValue("ANNOTATIONS_STATE") + if '<root>' in annotations: + try: + return ET.fromstring(annotations) + except Exception: + log("ERROR", "READING ANNOTATIONS XML FOR " + sample.getSampleIdentifier()); + return None \ No newline at end of file diff --git a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/plugin.properties b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/plugin.properties new file mode 100644 index 00000000000..0f6f1d90c48 --- /dev/null +++ b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/plugin.properties @@ -0,0 +1,9 @@ +## +## Defaults +## +incoming-data-completeness-condition = auto-detection +top-level-data-set-handler = ch.systemsx.cisd.etlserver.registrator.api.v2.JythonTopLevelDataSetHandlerV2 +storage-processor = ch.systemsx.cisd.etlserver.DefaultStorageProcessor +script-path = consistency.py +dropbox-name = stellingconsistency +incoming-dir = ${root-dir}/stellingconsistency \ No newline at end of file -- GitLab