diff --git a/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py b/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py
index c3a30de547991465af77051e43cdf979bc753284..df05c0e01b490a91de6ddc5e4ebd0b59e2a19a22 100644
--- a/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py
+++ b/eu_basynthec/dist/etc/transcriptomics/data-set-handler.py
@@ -2,6 +2,9 @@ from datetime import datetime
 from eu.basynthec.cisd.dss import TimeSeriesDataExcel
 import re
 
+# A Regex for matching the column headers
+header_regex = re.compile("^(.+)-([0-9]) ([0-9]+)")
+
 def set_data_type(data_set):
   data_set.setPropertyValue("DATA_TYPE", "TRANSCRIPTOMICS")
 
@@ -96,7 +99,6 @@ def convert_data_to_split_tsv(tr, start_row, start_col, dataset, location):
   
   # Extract the column / strain mapping
   header_line = raw_data[start_row]
-  header_regex = re.compile("^(MGP[0-9]{1,3})-([0-9]) ([0-9]+)")
   for i in range(start_col, len(header_line)):
     match = header_regex.match(header_line[i])
     strain_name = match.group(1)
@@ -149,10 +151,9 @@ def store_original_data(tr, dataset, location):
   tr.moveFile(incoming.getAbsolutePath(), dataset, location + "/" + incoming.getName())
 
 def extract_strains(start_row, start_col):
-  """Extract the strain names from the header."""
+  """Extract the strain names from the header. These have already been validated by the validator."""
   strains = []
   line = timeSeriesData.getRawDataLines()[start_row]
-  header_regex = re.compile("^(MGP[0-9]{1,3})-([0-9]) ([0-9]+)")
   for i in range(start_col, len(line)):
     match = header_regex.match(line[i])
     strains.append(match.group(1))
diff --git a/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py b/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py
index 0b7095673b65d9d2a6508ea7c8a558a771662018..ad62da996232b75a1e8323011bc24a7c3c8eca0b 100644
--- a/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py
+++ b/eu_basynthec/dist/etc/transcriptomics/data-set-validator.py
@@ -3,11 +3,17 @@ def validate_header(line, first_data_col, errors):
   if line[0] != "Locustag":
     errors.append(createFileValidationError("The first data column must be 'Locustag' (not " + line[0] + ")."))
     return False
-  header_regex = re.compile("^.+-[0-9] [0-9]+")
+  header_regex = re.compile("^(.+)-([0-9]) ([0-9]+)")
   for i in range(first_data_col, len(line)):
     match = header_regex.match(line[i])
     if match is None:
-      errors.append(createFileValidationError("The column header + " + str(i) + " must be of the form [STRAIN]-[BIOLOGICAL REPLICATE] [HYBRIDIZATION NUMBER]"))
+      errors.append(createFileValidationError("The column header + " + str(i) + " must be of the form [STRAIN]-[BIOLOGICAL REPLICATE] [HYBRIDIZATION NUMBER]. " + line[i] + " is not."))
+      continue
+    strainName = match.group(1)
+    if isStrainIdValid(strainName) is False:
+      errors.append(createFileValidationError("The column header + " + str(i) + " must be of the form [STRAIN]-[BIOLOGICAL REPLICATE] [HYBRIDIZATION NUMBER]. " + strainName + " is not a recognized strain."))
+      continue      
+    
 
 
 def validate_data(time_series_data, first_data_row, first_data_col, errors):