Skip to content
Snippets Groups Projects
Commit d62311a6 authored by juanf's avatar juanf
Browse files

SSDM-1728: YeastLab Data Curation - Report

SVN: 33831
parent de76558c
No related branches found
No related tags found
No related merge requests found
...@@ -9,22 +9,26 @@ import xml.etree.ElementTree as ET ...@@ -9,22 +9,26 @@ import xml.etree.ElementTree as ET
## ##
sampleTypesToVerify = ["YEAST","POMBE"]; sampleTypesToVerify = ["YEAST","POMBE"];
logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"]; logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX", "AUTO-FIX-2"];
## ##
## Logging ## Logging
## ##
numberOfManualFixes = 0; numberOfManualFixes = 0;
numberOfAutoFixes = 0; numberOfAutoFixesDeletes = 0;
numberOfAutoFixesLost = 0;
def log(level, message): def log(level, message):
if level == "MANUAL-FIX": if level == "MANUAL-FIX":
global numberOfManualFixes global numberOfManualFixes
numberOfManualFixes = numberOfManualFixes + 1 numberOfManualFixes = numberOfManualFixes + 1
if level == "AUTO-FIX": if level == "AUTO-FIX":
global numberOfAutoFixes global numberOfAutoFixesDeletes
numberOfAutoFixes = numberOfAutoFixes + 1 numberOfAutoFixesDeletes = numberOfAutoFixesDeletes + 1
if any(level in s for s in logLevelsToPrint): if level == "AUTO-FIX-2":
global numberOfAutoFixesLost
numberOfAutoFixesLost = numberOfAutoFixesLost + 1
if level in logLevelsToPrint:
print "[" + level + "] " + message; print "[" + level + "] " + message;
## ##
...@@ -75,21 +79,23 @@ def process(tr): ...@@ -75,21 +79,23 @@ def process(tr):
verify(tr, sample) verify(tr, sample)
global numberOfManualFixes global numberOfManualFixes
global numberOfAutoFixes global numberOfAutoFixesDeletes
log("REPORT", "FOUND " + str(numberOfAutoFixes) + " AUTOMATIC FIXES!"); global numberOfAutoFixesLost
log("REPORT", "FOUND " + str(numberOfAutoFixesDeletes) + " AUTOMATIC DELETE FIXES!");
log("REPORT", "FOUND " + str(numberOfAutoFixesLost) + " AUTOMATIC LOST FIXES!");
log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!"); log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!");
log("REPORT", "FINISH VERIFICATION REPORT!"); log("REPORT", "FINISH VERIFICATION REPORT!");
def verify(tr, sample): def verify(tr, sample):
annotationsRoot = getAnnotationsRootNodeFromSample(sample) annotationsRoot = getAnnotationsRootNodeFromSample(sample)
requiredAnnotationsFound = getRequiredAnnotations(sample)
#1. Delete annotations and parent to be added
#1.Annotations hierarchy #1.Annotations hierarchy
requiredAnnotationsFound = getRequiredAnnotations(sample) #To detect case 4
requiredAnnotationsFromParents = getRequiredAnnotationsFromParents(sample) #To detect case 5
if annotationsRoot is not None: if annotationsRoot is not None:
for annotation in annotationsRoot: for annotation in annotationsRoot:
annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample
requiredAnnotationsFound[annotatedSampleIdentifier] = True requiredAnnotationsFound[annotatedSampleIdentifier] = True
requiredAnnotationsFromParents[annotatedSampleIdentifier] = True
try: try:
if isChild(sample, annotatedSampleIdentifier): if isChild(sample, annotatedSampleIdentifier):
#This is an annotation from a parent, this is by default correct and don't needs further inspection. #This is an annotation from a parent, this is by default correct and don't needs further inspection.
...@@ -110,10 +116,27 @@ def verify(tr, sample): ...@@ -110,10 +116,27 @@ def verify(tr, sample):
log("MANUAL-FIX", "CASE 2 - THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier()); log("MANUAL-FIX", "CASE 2 - THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier());
except Exception, err: except Exception, err:
log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier() + " ERR: " + str(err)); log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier() + " ERR: " + str(err));
#2.Missing Annotations #2.Missing Parents Annotations
for parentIdentifier in requiredAnnotationsFound: for parentIdentifier in requiredAnnotationsFound:
if not requiredAnnotationsFound[parentIdentifier]: if not requiredAnnotationsFound[parentIdentifier]:
log("MANUAL-FIX", "CASE 4 - MISSING ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR PARENT:" + parentIdentifier); log("MANUAL-FIX", "CASE 4 - MISSING ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR PARENT:" + parentIdentifier);
#3.Missing Annotations LOST
for parentAnnotationIdentifier in requiredAnnotationsFromParents:
if not requiredAnnotationsFromParents[parentAnnotationIdentifier]:
log("AUTO-FIX-2", "CASE 2 - MISSING LOST ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR LOST:" + parentAnnotationIdentifier);
def getRequiredAnnotationsFromParents(sample):
requiredAnnotationsFromParents = {}
for parentIdentifier in sample.getParentSampleIdentifiers():
if ("/FRY" in parentIdentifier) or ("/FRS" in parentIdentifier): #Only check Yeast and Pombe Parents
parent = getSampleFromCache(parentIdentifier)
parentAnnotationsRoot = getAnnotationsRootNodeFromSample(parent)
if parentAnnotationsRoot is not None:
for parentAnnotation in parentAnnotationsRoot:
parentAnnotationIdentifier = parentAnnotation.attrib["identifier"];
if "/FRP" in parentAnnotationIdentifier: #Only require Plasmids
requiredAnnotationsFromParents[parentAnnotationIdentifier] = False
return requiredAnnotationsFromParents;
def getRequiredAnnotations(sample): def getRequiredAnnotations(sample):
requiredAnnotationsFound = {} requiredAnnotationsFound = {}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment