From 91a2e8dde5a7db49b3f13f71793764c267edae52 Mon Sep 17 00:00:00 2001 From: mattminder <myfiles@Mattus-MacBook-Pro.local> Date: Thu, 14 May 2020 13:49:03 +0200 Subject: [PATCH] extract entire movie, new extraction output --- GUI_main.py | 349 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 213 insertions(+), 136 deletions(-) diff --git a/GUI_main.py b/GUI_main.py index 612f110..6aba107 100644 --- a/GUI_main.py +++ b/GUI_main.py @@ -52,10 +52,12 @@ usual buttons (because the Cell Correspondance makes also mistakes). """ import sys import numpy as np +import pandas as pd +import h5py # For writing excel files -from openpyxl import load_workbook -from openpyxl import Workbook +#from openpyxl import load_workbook +#from openpyxl import Workbook # Import everything for the Graphical User Interface from the PyQt5 library. from PyQt5.QtWidgets import (QApplication, QMainWindow, QDialog, @@ -66,6 +68,8 @@ from PyQt5 import QtGui from matplotlib.backends.qt_compat import QtWidgets from matplotlib.backends.backend_qt5agg import NavigationToolbar2QT as NavigationToolbar +from sklearn.decomposition import PCA + #append all the paths where the modules are stored. Such that this script #looks into all of these folders when importing modules. sys.path.append("./unet") @@ -500,9 +504,8 @@ class App(QMainWindow): def DialogXls(self): - """This function creates a dialog window which gives two options to the - user either to load an existing xls file or to give a new name in order - to create a new xls file. + """This function creates a dialog window which asks for the user to + enter a csv name. """ # creates the window dwind = ddb.FileBrowser() @@ -531,69 +534,144 @@ class App(QMainWindow): xlsname = xlsname + templist[k] + '/' # this is the new path/filename - xlsname = xlsname + newxlsname + '.xlsx' + xlsname = xlsname + newxlsname + '.csv' self.xlsfilename = xlsname - self.CreateXls(xlsname) +# self.CreateXls(xlsname) self.ExtractFluo(xlsname) # this flag is set to true, for the current field of view each # time extract fluorescence is clicked it writes in the file located # at self.xlsfilename. - self.FlagFluoExtraction = True +# self.FlagFluoExtraction = True else: return - def CreateXls(self, xlsfilename): - """In case there is no xls file existing, here a new one is created - and prepared. For each channel a new sheet is created. - In the first row for each sheet, the time indices are written t = 0, - t = 1, etc... but only every third column. Because in the row below, - three values are extracted 'Total intensity', 'Area' and 'Variance' - at each time index. So three columns for each time index are needed, - for the three data points. - The first column is left empty (starting from third row) because - the cell numbers will be written in there. - """ - - book = Workbook() - nbrchannels = self.reader.sizec - - for i in range(0,nbrchannels): - sheetname = self.reader.channel_names[i] - # creates a sheet with the name of the corresponding channel. - if i == 0: - sheet = book.active - sheet.title = sheetname - else: - sheet = book.create_sheet(sheetname) - sheet.cell(1,1, 'Cell Number / Time axis') - sheet.cell(2,1, 'labels') - timeaxissize = self.reader.sizet - # start writing the time index at column 1, column 0 is reserved for - # cell numbers. - timecolindex = 2 - - for t in range(1,timeaxissize+1): - sheet.cell(1,timecolindex).value = 't = {}'.format(t-1) - sheet.cell(2,timecolindex).value = 'Total Intensity' - sheet.cell(2,timecolindex+1).value = 'Total Area' - sheet.cell(2,timecolindex+2).value = 'Mean Intensity' - sheet.cell(2,timecolindex+3).value = 'Variance' - # updates the index, where the next time index should be written - timecolindex = timecolindex + 4 - - try: - book.save(xlsfilename) - except TypeError: - QMessageBox.critical(self, "Error", "TypeError encountered. \ - Make sure you have openpyxl version 3.0.1 \ - installed. If the problem persists contact \ - the developers.") +# def ExtractFluo(self, xlsfilename): +# """This is the function that takes as argument the filepath to the xls +# file and writes in the file. +# It iterates over the different channels (or the sheets of the file, +# each channel has one sheet.), and reads the image corresponding +# to the time, field of view and channel index. It reads the already +# existing file and makes a copy in which the data will be written in it. +# +# The first step of calculating the data is to iterate through each +# cell/segment of the mask (so each cell is a submatrix of one value +# in the matrix of the mask). +# For each of these value /cell, the area is extracted as being +# the number of pixels corresponding to this cell/value. +# (it is known from the microscope settings how to convert +# the pixel in area). +# The total intensity is just the value of the pixel and it is added over +# all the pixels corresonding to the cell/value. +# The mean is then calculated as being the total intensity divided by +# the number of pixels (which here is equal to the area also). +# With the mean it is then possible to calculate the variance of the +# signal for one cell/value. +# +# Then, it is checked if the value of the cell (cell number) already +# exists in the first column, if it already exists it continues to +# find the column corresponding to the time index where the values +# should be written. It sets the flag to True such that it does not +# write the cell as new one and adds it at the end of the column +# +# If the value is not found in the cell number column (new cell or +# first time writing in the file), the flag is False, thus it adds the +# cell number at the end of the column. +# It then saves the xls file. +# """ +# self.Disable(self.button_extractfluorescence) +# self.WriteStatusBar('Extracting the fluorescence...') +# +# # opens the file to read it. +# book = load_workbook(self.xlsfilename) +# +# +# # iterate over all the channels, so over all the sheets in the file +# for channel in range(0, self.reader.sizec): +# # loads the picture corresponding to the channel, time index and fov +# image = self.reader.LoadImageChannel(self.Tindex, self.FOVindex, channel) +# +# # loads the sheet to read out corresponding to the current channel +# sheet = book.worksheets[channel] +# +# +# # this index contains the value of the maximum number of rows in the +# # file, it is used to append at the end the cell number column a new +# # cell/value, and it is updated each time a new cell is added. +# tempidx = sheet.max_row +# +# # np.unique(array) returns an array which contains all the value +# # that appear in self.m.plotmask, so it returns every cell value +# # including the background (value 0) present in self.m.plotmask +# for val in np.unique(self.m.plotmask): +# +# # Skip background +# if val == 0: +# continue +# +# # Calculate stats +# area = (self.m.plotmask == val).sum() +# tot_intensity = image[self.m.plotmask == val].sum() +# mean = tot_intensity/area +# var = np.var(image[self.m.plotmask==val]) +# +# # if flag is false it means that the cell number +# # corresponding to val is not present in the xls file, first +# # column. +# flag = False +# +# # iterate over all the rows +# for row in range(sheet.max_row+1): +# # test if in the first column 0, the number of the cell +# # is already present +# # if sheet.cell_value(row,0) == str(val): +# if sheet.cell(row = row+1, column = 1).value == str(val): +# +# # if is present, the column corresponding to the +# # current time index is by iterating over the cols. +# for col in range(sheet.max_column+1): +# # test if it is the right column +# # if sheet.cell_value(0, col) == 't = {}'.format(self.Tindex): +# if sheet.cell(row = 1, column = col+1).value == 't = {}'.format(self.Tindex): +# # write in the xls file at the row, col coord +# sheet.cell(row+1, col+1, str(tot_intensity)) +# sheet.cell(row+1, col+2, str(area)) +# sheet.cell(row+1,col+3, str(mean)) +# sheet.cell(row+1, col+4, str(var)) +# book.save(xlsfilename) +# +# # the flag is set to True so that it does +# # not execute the code where the cell is +# # added in the xls file in a new row. +# flag = True +# +# if not flag: +# # this lines are executed if a new cell is detected or if +# # if it is the first time to write in the file. +# for col in range(sheet.max_column+1): +# if sheet.cell(row = 1, column = col+1).value == 't = {}'.format(self.Tindex): +# # it write the cell value/cell number in the +# # column +# sheet.cell(tempidx+1,1, str(val)) +# +# # writes the data extracted before +# sheet.cell(tempidx+1,col+1,str(tot_intensity)) +# sheet.cell(tempidx+1, col+2, str(area)) +# sheet.cell(tempidx+1, col+3, str(mean)) +# sheet.cell(tempidx+1, col+4, str(var)) +# # it updates the number of rows as a new cell +# # has been added, so there is one more row. +# tempidx = tempidx + 1 +# book.save(xlsfilename) +# +# self.Enable(self.button_extractfluorescence) +# self.ClearStatusBar() + def ExtractFluo(self, xlsfilename): """This is the function that takes as argument the filepath to the xls file and writes in the file. @@ -626,96 +704,95 @@ class App(QMainWindow): first time writing in the file), the flag is False, thus it adds the cell number at the end of the column. It then saves the xls file. + """ + self.Disable(self.button_extractfluorescence) self.WriteStatusBar('Extracting the fluorescence...') - # opens the file to read it. - book = load_workbook(self.xlsfilename) - - - # iterate over all the channels, so over all the sheets in the file - for channel in range(0, self.reader.sizec): - # loads the picture corresponding to the channel, time index and fov - image = self.reader.LoadImageChannel(self.Tindex, self.FOVindex, channel) - - # loads the sheet to read out corresponding to the current channel - sheet = book.worksheets[channel] - - - # this index contains the value of the maximum number of rows in the - # file, it is used to append at the end the cell number column a new - # cell/value, and it is updated each time a new cell is added. - tempidx = sheet.max_row - - # np.unique(array) returns an array which contains all the value - # that appear in self.m.plotmask, so it returns every cell value - # including the background (value 0) present in self.m.plotmask - for val in np.unique(self.m.plotmask): - - # Skip background - if val == 0: - continue - - # Calculate stats - area = (self.m.plotmask == val).sum() - tot_intensity = image[self.m.plotmask == val].sum() - mean = tot_intensity/area - var = np.var(image[self.m.plotmask==val]) - - # if flag is false it means that the cell number - # corresponding to val is not present in the xls file, first - # column. - flag = False - - # iterate over all the rows - for row in range(sheet.max_row+1): - # test if in the first column 0, the number of the cell - # is already present - # if sheet.cell_value(row,0) == str(val): - if sheet.cell(row = row+1, column = 1).value == str(val): - - # if is present, the column corresponding to the - # current time index is by iterating over the cols. - for col in range(sheet.max_column+1): - # test if it is the right column - # if sheet.cell_value(0, col) == 't = {}'.format(self.Tindex): - if sheet.cell(row = 1, column = col+1).value == 't = {}'.format(self.Tindex): - # write in the xls file at the row, col coord - sheet.cell(row+1, col+1, str(tot_intensity)) - sheet.cell(row+1, col+2, str(area)) - sheet.cell(row+1,col+3, str(mean)) - sheet.cell(row+1, col+4, str(var)) - book.save(xlsfilename) - - # the flag is set to True so that it does - # not execute the code where the cell is - # added in the xls file in a new row. - flag = True - - if not flag: - # this lines are executed if a new cell is detected or if - # if it is the first time to write in the file. - for col in range(sheet.max_column+1): - if sheet.cell(row = 1, column = col+1).value == 't = {}'.format(self.Tindex): - # it write the cell value/cell number in the - # column - sheet.cell(tempidx+1,1, str(val)) - - # writes the data extracted before - sheet.cell(tempidx+1,col+1,str(tot_intensity)) - sheet.cell(tempidx+1, col+2, str(area)) - sheet.cell(tempidx+1, col+3, str(mean)) - sheet.cell(tempidx+1, col+4, str(var)) - # it updates the number of rows as a new cell - # has been added, so there is one more row. - tempidx = tempidx + 1 - book.save(xlsfilename) - + # List of cell properties + cell_list = [] + + for time_index in range(0, self.reader.sizet): + + # Test if time has a mask + file = h5py.File(self.reader.hdfpath, 'r+') + time_exist = self.reader.TestTimeExist(time_index, self.FOVindex, file) + file.close() + + if not time_exist: + continue + + # Extract statistics for all channels and all cells + for channel in range(0, self.reader.sizec): + # load picture and sheet + image = self.reader.LoadImageChannel(time_index, self.FOVindex, channel) + mask = self.reader.LoadMask(time_index, self.FOVindex) + + for val in np.unique(mask): + # bg is not cell + if val == 0: + continue + + # Calculate stats + stats = self.cell_statistics(image, mask == val) + stats['Time'] = time_index + stats['Channel Index'] = channel + stats['Channel'] = self.reader.channel_names[channel] + cell_list.append(stats) + + # Use Pandas to write csv + df = pd.DataFrame(cell_list) + df.to_csv(self.xlsfilename, index=False) + self.Enable(self.button_extractfluorescence) self.ClearStatusBar() + def cell_statistics(self, image, mask): + """Calculate statistics about cells. Passing None to image will + create dictionary to zeros, which allows to extract dictionary keys""" + if image is not None: + cell_vals = image[mask] + area = mask.sum() + tot_intensity = cell_vals.sum() + mean = tot_intensity/area if area > 0 else 0 + var = np.var(cell_vals) + + # Center of mass + y,x = mask.nonzero() + # sample = np.random.choice(len(x), size=50, replace=True) + com_x = np.mean(x) + com_y = np.mean(y) + + # PCA only works for multiple points + if area > 1: + pca = PCA().fit(np.array([y,x]).T) + pc1_x, pc1_y = pca.components_[0,:] + angle = np.arctan(pc1_y / pc1_x) / np.pi * 360 + v1, v2 = pca.explained_variance_ + roundness = v2 / v1 + else: + angle = 0 + roundness = 1 + + else: + mean = 0 + var = 0 + tot_intensity = 0 + com_x = 0 + com_y = 0 + angle = 0 + roundness = 0 + + return {'Mean': mean, + 'Variance': var, + 'Total Intensity': tot_intensity, + 'Center of Mass X': com_x, + 'Center of Mass Y': com_y, + 'Cell Angle': angle, + 'Roundness': roundness} + # ----------------------------------------------------------------------------- # NEURAL NETWORK def ShowHideCNNbuttons(self): -- GitLab