Skip to content
Snippets Groups Projects
Commit fa025f68 authored by juanf's avatar juanf
Browse files

SSDM-1728: YeastLab Data Curation - Report

SVN: 33821
parent 49783fe2
No related branches found
No related tags found
No related merge requests found
......@@ -8,11 +8,7 @@ import xml.etree.ElementTree as ET
## Definitions
##
definitions = {
"YEAST" : {},
"POMBE" : {}
};
sampleTypesToVerify = ["YEAST","POMBE"];
logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"];
##
......@@ -20,10 +16,14 @@ logLevelsToPrint = ["ERROR", "REPORT", "MANUAL-FIX"];
##
numberOfManualFixes = 0;
numberOfAutoFixes = 0;
def log(level, message):
if level == "MANUAL-FIX":
global numberOfManualFixes
numberOfManualFixes = numberOfManualFixes + 1
if level == "AUTO-FIX":
global numberOfAutoFixes
numberOfAutoFixes = numberOfAutoFixes + 1
if any(level in s for s in logLevelsToPrint):
print "[" + level + "] " + message;
......@@ -66,24 +66,28 @@ def getSampleByPermId(tr, permId):
def process(tr):
log("REPORT", "START VERIFICATION REPORT!");
for sampleType in definitions:
properties = definitions[sampleType]
for sampleType in sampleTypesToVerify:
samples = getSamplesByType(tr, sampleType)
global currentCache
currentCache = samples
print sampleType + ": "+ str(len(samples))
for sample in samples:
verify(tr, sample, properties)
verify(tr, sample)
global numberOfManualFixes
global numberOfAutoFixes
log("REPORT", "FOUND " + str(numberOfAutoFixes) + " AUTOMATIC FIXES!");
log("REPORT", "REQUIRED " + str(numberOfManualFixes) + " MANUAL FIXES!");
log("REPORT", "FINISH VERIFICATION REPORT!");
def verify(tr, sample, properties):
def verify(tr, sample):
annotationsRoot = getAnnotationsRootNodeFromSample(sample)
requiredAnnotationsFound = getRequiredAnnotations(sample)
#1.Annotations hierarchy
if annotationsRoot is not None:
for annotation in annotationsRoot:
annotatedSampleIdentifier = annotation.attrib["identifier"] #Identifier from annotated sample
requiredAnnotationsFound[annotatedSampleIdentifier] = True
try:
if isChild(sample, annotatedSampleIdentifier):
#This is an annotation from a parent, this is by default correct and don't needs further inspection.
......@@ -95,17 +99,26 @@ def verify(tr, sample, properties):
if foundAnnotation is not None and foundAncestor is not None:
log("INFO", "BAD CHILD FOUND - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
if areAnnotationsEqual(annotation, foundAnnotation):
log("INFO", "GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
log("AUTO-FIX", "CASE 1 - GOOD REPEATED ANNOTATION THAT CAN BE DELETED - " + sample.getSampleIdentifier() + " " + annotatedSampleIdentifier);
else:
log("MANUAL-FIX", "THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier());
log("MANUAL-FIX", "CASE 3 - THE ANNOTATION: " + annotatedSampleIdentifier + " IS DIFFERENT AT SAMPLE: " + sample.getSampleIdentifier() + " AND ORIGINAL ANCESTOR:" + foundAncestor.getSampleIdentifier());
elif foundAncestor is None:
log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier);
log("MANUAL-FIX", "CASE 1 - THE ANNOTATED SAMPLE IS NOT AN ANCESTOR - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION WITH MISSING ANCESTOR:" + annotatedSampleIdentifier);
elif foundAnnotation is None:
log("MANUAL-FIX", "THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier());
log("MANUAL-FIX", "CASE 2 - THE ANNOTATED SAMPLE IS NOT ANNOTATED WHERE IT SHOULD - FOR SAMPLE: " + sample.getSampleIdentifier() + " ANNOTATION: " + annotatedSampleIdentifier +" NOT AT " + foundAncestor.getSampleIdentifier());
except Exception:
log("ERROR", "PROCESSING ANNOTATIONS XML CHILD " + sample.getSampleIdentifier());
else:
pass #No valid annotations found
#2.Missing Annotations
for parentIdentifier in requiredAnnotationsFound:
if not requiredAnnotationsFound[parentIdentifier]:
log("MANUAL-FIX", "CASE 4 - MISSING ANNOTATIONS ON SAMPLE: " + sample.getSampleIdentifier() + " FOR PARENT:" + parentIdentifier);
def getRequiredAnnotations(sample):
requiredAnnotationsFound = {}
for parentIdentifier in sample.getParentSampleIdentifiers():
if "/FRP" in parentIdentifier: #Only require Plasmids
requiredAnnotationsFound[parentIdentifier] = False;
return requiredAnnotationsFound;
def areAnnotationsEqual(annotationA, annotationB):
for key in annotationA.attrib:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment