From fa025f686a042bd5ea8eae698bdc84ec655a037a Mon Sep 17 00:00:00 2001
From: juanf <juanf>
Date: Tue, 14 Apr 2015 12:30:14 +0000
Subject: [PATCH] SSDM-1728: YeastLab Data Curation - Report

SVN: 33821
---
 .../stellingconsistency/consistency.py        | 43 ++++++++++++-------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py
index 71f36a5165e..5f9a373566a 100644
--- a/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py
+++ b/plasmid/source/core-plugins/stellingmigration/1/dss/drop-boxes/stellingconsistency/consistency.py
@@ -8,11 +8,7 @@ import xml.etree.ElementTree as ET
 ## Definitions
 ##
 
-definitions = {
-               "YEAST" : {},
-               "POMBE" : {}
-};
-
+sampleTypesToVerify = ["YEAST","POMBE"];
 logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"];
 
 ##
@@ -20,10 +16,14 @@ logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"];
 ##
 
 numberOfManualFixes = 0;
+numberOfAutoFixes = 0;
 def log(level, message):
     if level == "MANUAL-FIX":
         global numberOfManualFixes
         numberOfManualFixes = numberOfManualFixes + 1
+    if level == "AUTO-FIX":
+        global numberOfAutoFixes
+        numberOfAutoFixes = numberOfAutoFixes + 1
     if any(level in s for s in logLevelsToPrint):
         print "[" + level + "] " + message;
 
@@ -66,24 +66,28 @@ def getSampleByPermId(tr, permId):
 def process(tr):
     log("REPORT", "START VERIFICATION REPORT!");
     
-    for sampleType in definitions:
-        properties = definitions[sampleType]
+    for sampleType in sampleTypesToVerify:
         samples = getSamplesByType(tr, sampleType)
         global currentCache
         currentCache = samples
         print sampleType + ": "+ str(len(samples))
         for sample in samples:
-            verify(tr, sample, properties)
+            verify(tr, sample)
     
     global numberOfManualFixes
+    global numberOfAutoFixes
+    log("REPORT", "FOUND " + str(numberOfAutoFixes) + " AUTOMATIC FIXES!");
     log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!");
     log("REPORT", "FINISH VERIFICATION REPORT!");
 
-def verify(tr, sample, properties):
+def verify(tr, sample):
     annotationsRoot = getAnnotationsRootNodeFromSample(sample)
+    requiredAnnotationsFound = getRequiredAnnotations(sample)
+    #1.Annotations hierarchy
     if annotationsRoot is not None:
         for annotation in annotationsRoot:
             annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample
+            requiredAnnotationsFound[annotatedSampleIdentifier] = True
             try:
                 if isChild(sample, annotatedSampleIdentifier):
                     #This is an annotation from a parent, this is by default correct and don't needs further inspection.
@@ -95,17 +99,26 @@ def verify(tr, sample, properties):
                     if foundAnnotation is not None and foundAncestor is not None:
                         log("INFO", "BAD CHILD FOUND - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
                         if areAnnotationsEqual(annotation, foundAnnotation):
-                            log("INFO", "GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
+                            log("AUTO-FIX", "CASE 1 - GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
                         else:
-                            log("MANUAL-FIX", "THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier());
+                            log("MANUAL-FIX", "CASE 3 - THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier());
                     elif foundAncestor is None:
-                        log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier);
+                        log("MANUAL-FIX", "CASE 1 - THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier);
                     elif foundAnnotation is None:
-                        log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier());
+                        log("MANUAL-FIX", "CASE 2 - THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier());
             except Exception:
                 log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier());
-    else:
-        pass #No valid annotations found
+    #2.Missing Annotations
+    for parentIdentifier in requiredAnnotationsFound:
+        if not requiredAnnotationsFound[parentIdentifier]:
+            log("MANUAL-FIX", "CASE 4 - MISSING ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR PARENT:" + parentIdentifier);
+
+def getRequiredAnnotations(sample):
+    requiredAnnotationsFound = {}
+    for parentIdentifier in sample.getParentSampleIdentifiers():
+        if "/FRP" in parentIdentifier: #Only require Plasmids
+            requiredAnnotationsFound[parentIdentifier] = False;
+    return requiredAnnotationsFound;
 
 def areAnnotationsEqual(annotationA, annotationB):
     for key in annotationA.attrib:
-- 
GitLab