From fa025f686a042bd5ea8eae698bdc84ec655a037a Mon Sep 17 00:00:00 2001 From: juanf <juanf> Date: Tue, 14 Apr 2015 12:30:14 +0000 Subject: [PATCH] SSDM-1728: YeastLab Data Curation - Report SVN: 33821 --- .../stellingconsistency/consistency.py | 43 ++++++++++++------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py index 71f36a5165e..5f9a373566a 100644 --- a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py +++ b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py @@ -8,11 +8,7 @@ import xml.etree.ElementTree as ET ## Definitions ## -definitions = { - "YEAST" : {}, - "POMBE" : {} -}; - +sampleTypesToVerify = ["YEAST","POMBE"]; logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"]; ## @@ -20,10 +16,14 @@ logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"]; ## numberOfManualFixes = 0; +numberOfAutoFixes = 0; def log(level, message): if level == "MANUAL-FIX": global numberOfManualFixes numberOfManualFixes = numberOfManualFixes + 1 + if level == "AUTO-FIX": + global numberOfAutoFixes + numberOfAutoFixes = numberOfAutoFixes + 1 if any(level in s for s in logLevelsToPrint): print "[" + level + "] " + message; @@ -66,24 +66,28 @@ def getSampleByPermId(tr, permId): def process(tr): log("REPORT", "START VERIFICATION REPORT!"); - for sampleType in definitions: - properties = definitions[sampleType] + for sampleType in sampleTypesToVerify: samples = getSamplesByType(tr, sampleType) global currentCache currentCache = samples print sampleType + ": "+ str(len(samples)) for sample in samples: - verify(tr, sample, properties) + verify(tr, sample) global numberOfManualFixes + global numberOfAutoFixes + log("REPORT", "FOUND " + str(numberOfAutoFixes) + " AUTOMATIC FIXES!"); log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!"); log("REPORT", "FINISH VERIFICATION REPORT!"); -def verify(tr, sample, properties): +def verify(tr, sample): annotationsRoot = getAnnotationsRootNodeFromSample(sample) + requiredAnnotationsFound = getRequiredAnnotations(sample) + #1.Annotations hierarchy if annotationsRoot is not None: for annotation in annotationsRoot: annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample + requiredAnnotationsFound[annotatedSampleIdentifier] = True try: if isChild(sample, annotatedSampleIdentifier): #This is an annotation from a parent, this is by default correct and don't needs further inspection. @@ -95,17 +99,26 @@ def verify(tr, sample, properties): if foundAnnotation is not None and foundAncestor is not None: log("INFO", "BAD CHILD FOUND - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier); if areAnnotationsEqual(annotation, foundAnnotation): - log("INFO", "GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier); + log("AUTO-FIX", "CASE 1 - GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier); else: - log("MANUAL-FIX", "THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier()); + log("MANUAL-FIX", "CASE 3 - THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier()); elif foundAncestor is None: - log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier); + log("MANUAL-FIX", "CASE 1 - THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier); elif foundAnnotation is None: - log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier()); + log("MANUAL-FIX", "CASE 2 - THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier()); except Exception: log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier()); - else: - pass #No valid annotations found + #2.Missing Annotations + for parentIdentifier in requiredAnnotationsFound: + if not requiredAnnotationsFound[parentIdentifier]: + log("MANUAL-FIX", "CASE 4 - MISSING ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR PARENT:" + parentIdentifier); + +def getRequiredAnnotations(sample): + requiredAnnotationsFound = {} + for parentIdentifier in sample.getParentSampleIdentifiers(): + if "/FRP" in parentIdentifier: #Only require Plasmids + requiredAnnotationsFound[parentIdentifier] = False; + return requiredAnnotationsFound; def areAnnotationsEqual(annotationA, annotationB): for key in annotationA.attrib: -- GitLab