Skip to content
Snippets Groups Projects
Commit 7f579b6f authored by barillac's avatar barillac
Browse files

dropbox script for importing gene cluster images

SVN: 30057
parent b37603fd
No related branches found
No related tags found
No related merge requests found
#! /usr/bin/env python
"""
Script for uploading cluster data. Data is organized in directories called as cluster numbers (from 1 to 19). Each cluster
directory has 1 pdf file and 10 TIF image files. Each directory also has several subdirectories with gene names. In each of these
a pdf file and 10 mp4 files are contained.
In openBIS samples withe the names of clusters will be created with the files contained in them uploaded as datasets.
the gene directories will be uploaded as contained samples in the corresponding cluster with the respective files uploaded as datasets.
Script for uploading cluster data. Data is organized in directories called as cluster numbers (from 1 to 17). Each cluster
directory has 1 pdf file and 10 TIF image files. There is also a text file that contains the list of genes contained in each cluster.
"""
......@@ -38,10 +36,12 @@ def process(transaction):
exp = transaction.createNewExperiment("/SINERGIA/CLUSTERS/CLUSTERS", 'SIRNA_HCS')
exp.setPropertyValue("DESCRIPTION", "gene clusters")
#create samples called Cluster1 to 19. Take the name from the directory (in the incoming folder there are 19 directories called cluster1-19)
newClusterSample=transaction.createNewSample("/SINERGIA/" + clusterName,'CLUSTER')
newClusterSample.setExperiment(exp)
#create samples called Cluster1 to 17. Take the name from the directory (in the incoming folder there are 17 directories called cluster1-17)
newClusterSample = transaction.getSample("/SINERGIA/" + clusterName)
if not newClusterSample:
newClusterSample=transaction.createNewSample("/SINERGIA/" + clusterName,'CLUSTER')
newClusterSample.setExperiment(exp)
#upload the pdf image of each cluster as dataset in the corresponding Cluster sample
for pdf in glob.glob(os.path.join(incoming.getPath(), '*.pdf')):
dataSetPDF = transaction.createNewDataSet()
......@@ -53,36 +53,72 @@ def process(transaction):
tifDir = incoming.getPath() + "/tiffs"
if not os.path.exists(tifDir):
os.makedirs(tifDir)
if glob.glob(os.path.join(incoming.getPath(), '*.TIF')):
for tif in glob.glob(os.path.join(incoming.getPath(), '*.TIF')):
if glob.glob(os.path.join(incoming.getPath(), '*.tif')):
for tif in glob.glob(os.path.join(incoming.getPath(), '*.tif')):
shutil.move(tif, tifDir)
dataSetTIF = transaction.createNewDataSet()
dataSetTIF.setDataSetType("MICROSCOPY_IMG")
dataSetTIF.setDataSetType("TIF_IMAGES")
dataSetTIF.setSample(newClusterSample)
transaction.moveFile(tifDir, dataSetTIF)
#in each Cluster directory there are subdirectories for each gene. Now we create a sample for each gene and set the cluster it belongs to as a container sample.
#Each gene has a pdf file and 10 movies, so tehy will be uploaded as datasets
if not glob.glob(os.path.join(incoming.getPath(), 'tiffs')):
for genes, pdfGene in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.pdf'))):
geneName = os.path.basename(genes)
newGeneSample = transaction.createNewSample("/SINERGIA/" + geneName,'GENE')
newGeneSample.setContainer(newClusterSample)
newGeneSample.setExperiment(exp)
videoDir = genes + "/videos"
if not os.path.exists(videoDir):
os.makedirs(videoDir)
dataSetpdfGene = transaction.createNewDataSet()
dataSetpdfGene.setDataSetType("PDF")
dataSetpdfGene.setSample(newGeneSample)
transaction.moveFile(pdfGene, dataSetpdfGene)
for genes, mp4 in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.mp4'))):
shutil.move(mp4, videoDir)
dataSetMP4Gene = transaction.createNewDataSet()
dataSetMP4Gene.setDataSetType("VIDEOS")
dataSetMP4Gene.setSample(newGeneSample)
transaction.moveFile(videoDir, dataSetMP4Gene)
# upload the 10 png images for each cluster as a dataset in the corresponding Cluster sample
pngDir = incoming.getPath() + "/pngs"
if not os.path.exists(pngDir):
os.makedirs(pngDir)
if glob.glob(os.path.join(incoming.getPath(), '*.png')):
for png in glob.glob(os.path.join(incoming.getPath(), '*.png')):
shutil.move(png, pngDir)
dataSetPNG = transaction.createNewDataSet()
dataSetPNG.setDataSetType("PNG_IMAGES")
dataSetPNG.setSample(newClusterSample)
transaction.moveFile(pngDir, dataSetPNG)
# Open the geneList text file and create samples with the name of the genes. These samples have to be contained in the corresponding cluster.
for textfile in glob.glob(os.path.join(incoming.getPath(), 'geneList.txt')):
text = open(textfile, "r")
lineIndex =0
for line in text:
lineIndex=lineIndex+1
gene_list = re.split(r"[,]",line)
gene_list = [ item.strip() for item in gene_list ]
gene_list = filter(lambda x: len(x) > 0, gene_list)
for gene in gene_list:
print gene
newGeneSample = transaction.createNewSample("/SINERGIA/" + gene,'GENE')
newGeneSample.setContainer(newClusterSample)
newGeneSample.setExperiment(exp)
###################################################################################################################################
#This part of the script assumes that the gene directories are inside the cluster directories.
# #in each Cluster directory there are subdirectories for each gene. Now we create a sample for each gene and set the cluster it belongs to as a container sample.
# #Each gene has a pdf file and 10 movies, so tehy will be uploaded as datasets
# if not glob.glob(os.path.join(incoming.getPath(), 'tiffs')):
# for genes, pdfGene in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.pdf'))):
# geneName = os.path.basename(genes)
# newGeneSample = transaction.createNewSample("/SINERGIA/" + geneName,'GENE')
# newGeneSample.setContainer(newClusterSample)
# newGeneSample.setExperiment(exp)
# videoDir = genes + "/videos"
# if not os.path.exists(videoDir):
# os.makedirs(videoDir)
# dataSetpdfGene = transaction.createNewDataSet()
# dataSetpdfGene.setDataSetType("PDF")
# dataSetpdfGene.setSample(newGeneSample)
# transaction.moveFile(pdfGene, dataSetpdfGene)
#
# for genes, mp4 in zip(glob.glob(os.path.join(incoming.getPath(), '*')), glob.glob(os.path.join(incoming.getPath(), '*/*.mp4'))):
# shutil.move(mp4, videoDir)
# dataSetMP4Gene = transaction.createNewDataSet()
# dataSetMP4Gene.setDataSetType("VIDEOS")
# dataSetMP4Gene.setSample(newGeneSample)
# transaction.moveFile(videoDir, dataSetMP4Gene)
#
#
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment