diff --git a/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py b/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py index 1694491cf26b05d93e8c75e158bc4bfc67bc40f6..8bdccd87bb344e829f56e0d37dac4976ee437c7f 100644 --- a/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py +++ b/eu_basynthec/dist/etc/growth-profiles/data-set-handler.py @@ -20,6 +20,13 @@ def extract_strains(): line = lines[i] strains.append(line[0].upper()) return ",".join(strains) + +def strain_canonical(strainId): + """Return the canonical form of the strainId""" + if strainId.lower().startswith('jjs-din'): + return "JJS-DIn" + strainId[7:] + else: + return strainId.upper() def assign_properties(dataset, metadata): """Assign properties to the data set from information in the data.""" @@ -38,7 +45,7 @@ def assign_properties(dataset, metadata): value = metadata.get(prop) if (key == "STRAIN"): value = value + " (STRAIN)" - dataset.setPropertyValue(key, value.upper()) + dataset.setPropertyValue(key, strain_canonical(value)) def convert_data_to_tsv(tr, dataset, location): """Create a tsv file containing the data and add it to the data set.""" diff --git a/eu_basynthec/dist/etc/proteomics/data-set-handler.py b/eu_basynthec/dist/etc/proteomics/data-set-handler.py index 81da79a7eda66c1182a2b31d2be4b13436f2918a..6473491df43604c6732f3f7e9883d58d059e596f 100644 --- a/eu_basynthec/dist/etc/proteomics/data-set-handler.py +++ b/eu_basynthec/dist/etc/proteomics/data-set-handler.py @@ -12,6 +12,13 @@ def retrieve_experiment(tr, exp_id): exp = tr.getExperiment(exp_id) return exp +def strain_canonical(strainId): + """Return the canonical form of the strainId""" + if strainId.lower().startswith('jjs-din'): + return "JJS-DIn" + strainId[7:] + else: + return strainId.upper() + def assign_properties(dataset, metadata): """Assign properties to the data set from information in the data.""" propertyNameMap = { @@ -28,8 +35,8 @@ def assign_properties(dataset, metadata): value = metadata.get(prop) if (key == "STRAIN"): value = value + " (STRAIN)" - dataset.setPropertyValue(key, value.upper()) - + dataset.setPropertyValue(key, strain_canonical(value)) + def convert_data_to_tsv(tr, dataset, location): """Create a tsv file containing the data and add it to the data set.""" tr.createNewDirectory(dataset, location) diff --git a/eu_basynthec/dist/etc/shared/shared-classes.py b/eu_basynthec/dist/etc/shared/shared-classes.py index 85911070ab2460befe13e521846122e1b7abd846..b502be6edf4f4d435c2d704959722e8f39abcf33 100644 --- a/eu_basynthec/dist/etc/shared/shared-classes.py +++ b/eu_basynthec/dist/etc/shared/shared-classes.py @@ -139,14 +139,30 @@ class ValidationHelper: if match is None: self.errors.append(createFileValidationError("The Start Data Col must be a letter between A and Z (not " + value + ").")) -strainIdRegex = re.compile("^JJS-MGP[0-9]{1,3}|^JJS-DIN[0-9]{1,3}|^MS|CHASSIS\s*[1-3]|WT 168 TRP\+") -def isStrainIdValid(strainId): - """Return true if the strain id passes validation (has the form sepecified in the regex)""" - strainId = strainId.strip().upper() - match = strainIdRegex.match(strainId) +# +# Strain validation stuff +# +strainIdRegex = re.compile("^ms|chassis\s*[1-3]|wt 168 trp\+") +strainIdRegexFull = re.compile("^jjs-mgp[0-9]{1,3}|^jjs-din[0-9]{1,3}|^ms|chassis\s*[1-3]|wt 168 trp\+") +strainIds = {} +home_dir = os.environ.get('HOME', '') +if os.path.exists(home_dir + '/var/strainids.txt'): + for sid in open(home_dir + '/var/strainids.txt').readlines(): + strainIds[sid.strip().lower()] = 1 + +def _match(regex, strainId): + match = regex.match(strainId) if match is None: return False - return match.end() == len(strainId) + return len(match.group(0)) == len(strainId) + +def isStrainIdValid(strainId): + """Return true if the strain id passes validation (has the form specified in the regex and is in Chris' strain db)""" + strainIdLower = strainId.lower() + if len(strainIds) > 0: + return strainIds.has_key(strainIdLower) or _match(strainIdRegex, strainIdLower) + else: + return _match(strainIdRegexFull, strainIdLower) def strainValidationErrorMessageFragment(strain): """Return a sentence fragment describing the strain validation error.""" diff --git a/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py b/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py index df05c0e01b490a91de6ddc5e4ebd0b59e2a19a22..1b7eb9a5f794eb5370cabcdcbb7f323742f2a807 100644 --- a/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py +++ b/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py @@ -37,6 +37,13 @@ def retrieve_experiment(tr, exp_id): exp = tr.getExperiment(exp_id) return exp +def strain_canonical(strainId): + """Return the canonical form of the strainId""" + if strainId.lower().startswith('jjs-din'): + return "JJS-DIn" + strainId[7:] + else: + return strainId.upper() + def assign_properties(dataset, metadata): """Assign properties to the data set from information in the data.""" propertyNameMap = { @@ -54,7 +61,7 @@ def assign_properties(dataset, metadata): value = metadata.get(prop) if (key == "STRAIN"): value = value + " (STRAIN)" - dataset.setPropertyValue(key, value.upper()) + dataset.setPropertyValue(key, strain_canonical(value)) def convert_data_to_tsv(tr, start_row, start_col, dataset, location): """Create a tsv file containing the data and add it to the data set.""" diff --git a/eu_basynthec/sourceTest/examples/Transcriptomics-Example.xlsx b/eu_basynthec/sourceTest/examples/Transcriptomics-Example.xlsx index 583ce54ebc47b44dbd9e0974cb26be60018d7ab8..8c93558fdd8c202048cbcdc515dd60efce7db3b1 100644 Binary files a/eu_basynthec/sourceTest/examples/Transcriptomics-Example.xlsx and b/eu_basynthec/sourceTest/examples/Transcriptomics-Example.xlsx differ diff --git a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsDataSetRegistratorTest.java b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsDataSetRegistratorTest.java index 11ea320a123db6cc6bddb150b0fb5d3770d646bd..e1019cba5e5fe14b404a7fdfdecffa95b3ea2c8c 100644 --- a/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsDataSetRegistratorTest.java +++ b/eu_basynthec/sourceTest/java/eu/basynthec/cisd/dss/transcriptomics/TranscriptomicsDataSetRegistratorTest.java @@ -74,7 +74,7 @@ public class TranscriptomicsDataSetRegistratorTest extends AbstractBaSynthecData assertNotNull(strainProperty); assert null != strainProperty; - assertEquals("MGP253,MGP776", strainProperty.getValue()); + assertEquals("JJS-MGP253,JJS-MGP776", strainProperty.getValue()); NewExternalData tsvDataSet = atomicOperationDetails.recordedObject().getDataSetRegistrations().get(2); @@ -91,10 +91,10 @@ public class TranscriptomicsDataSetRegistratorTest extends AbstractBaSynthecData String[] contents = tsvSplitFolder.list(); Arrays.sort(contents); String[] expectedContents = - { "Transcriptomics-Example.xlsx_MGP253.tsv", - "Transcriptomics-Example.xlsx_MGP776.tsv" }; + { "Transcriptomics-Example.xlsx_JJS-MGP253.tsv", + "Transcriptomics-Example.xlsx_JJS-MGP776.tsv" }; assertEquals(Arrays.asList(expectedContents), Arrays.asList(contents)); - File tsvSplitFile = new File(tsvSplitFolder, "Transcriptomics-Example.xlsx_MGP253.tsv"); + File tsvSplitFile = new File(tsvSplitFolder, "Transcriptomics-Example.xlsx_JJS-MGP253.tsv"); checkSplitTsvContent(tsvSplitFile); context.assertIsSatisfied(); } @@ -108,7 +108,7 @@ public class TranscriptomicsDataSetRegistratorTest extends AbstractBaSynthecData private void checkTsvContent(File tsvFile) throws IOException { String content = FileUtils.readFileToString(tsvFile); - assertEquals("Locustag\tMGP253-1 66687802\tMGP776-2 66730002\n" + assertEquals("Locustag\tJJS-MGP253-1 66687802\tJJS-MGP776-2 66730002\n" + "BSU00010\t13.7953\t13.5517\n" + "BSU00020\t13.5907\t13.3277\n" + "BSU00030\t13.8489\t13.6306\n" + "BSU00040\t14.3564\t14.1073\n" + "BSU00050\t14.5239\t14.1992\n" + "BSU00060\t14.3293\t13.933\n"