diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/AbstractFileTableReportingPlugin.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/AbstractFileTableReportingPlugin.java index e8c6edc47d6112a210ed421f148a0b49fad6c81d..405d997d1953b6abcf92bc87eaa4fbfdb7c9851c 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/AbstractFileTableReportingPlugin.java +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/server/plugins/standard/AbstractFileTableReportingPlugin.java @@ -24,12 +24,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Properties; -import org.apache.commons.io.FilenameUtils; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; -import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; import com.csvreader.CsvReader; @@ -40,7 +35,7 @@ import ch.systemsx.cisd.common.parser.ParsingException; import ch.systemsx.cisd.common.utilities.PropertyUtils; import ch.systemsx.cisd.openbis.dss.generic.shared.utils.CodeAndLabelUtil; import ch.systemsx.cisd.openbis.dss.generic.shared.utils.DatasetFileLines; -import ch.systemsx.cisd.openbis.dss.generic.shared.utils.ExcelFileReaderHelper; +import ch.systemsx.cisd.openbis.dss.generic.shared.utils.ExcelFileReader; import ch.systemsx.cisd.openbis.generic.shared.basic.TableCellUtil; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.CodeAndLabel; import ch.systemsx.cisd.openbis.generic.shared.basic.dto.ISerializableComparable; @@ -110,12 +105,13 @@ abstract public class AbstractFileTableReportingPlugin extends AbstractTableMode assert file != null : "Given file must not be null"; assert file.isFile() : "Given file '" + file.getAbsolutePath() + "' is not a file."; - if (isExcelFile(file)) + if (ExcelFileReader.isExcelFile(file)) { try { - Sheet sheet = getExcelSheet(file); - return load(dataset, sheet, file); + Workbook workbook = ExcelFileReader.getExcelWorkbook(file); + ExcelFileReader reader = new ExcelFileReader(workbook, ignoreComments); + return load(dataset, reader, file); } catch (final IOException ex) { throw new IOExceptionUnchecked(ex); @@ -140,47 +136,24 @@ abstract public class AbstractFileTableReportingPlugin extends AbstractTableMode } } - private static boolean isExcelFile(File file) + /** + * Loads data from the specified reader. + */ + private DatasetFileLines load(DatasetDescription dataset, ExcelFileReader reader, + File file) throws IOException { - return FilenameUtils.isExtension(file.getName().toLowerCase(), new String[] - { "xls", "xlsx" }); - } + assert reader != null : "Unspecified reader"; - private Sheet getExcelSheet(File file) throws IOException - { - Workbook wb = getExcelWorkbook(file); + List<String[]> lines = null; try { int index = Integer.parseInt(excelSheet); - return wb.getSheetAt(index); // will throw exception if index is out of range + lines = reader.readLines(index); // will throw exception if index is out of range } catch (NumberFormatException ex) { - Sheet sheet = wb.getSheet(excelSheet); - if (sheet == null) - { - throw new UserFailureException(file.getName() + " doesn't contain sheet named " - + excelSheet); - } - return sheet; - } - } - - private Workbook getExcelWorkbook(File file) throws IOException - { - final String extension = FilenameUtils.getExtension(file.getName()).toLowerCase(); - final FileInputStream stream = new FileInputStream(file); - if ("xls".equals(extension)) - { - POIFSFileSystem poifsFileSystem = new POIFSFileSystem(stream); - return new HSSFWorkbook(poifsFileSystem); - } else if ("xlsx".equals(extension)) - { - return new XSSFWorkbook(stream); - } else - { - throw new IllegalArgumentException( - "Expected an Excel file with 'xls' or 'xlsx' extension, got " + file.getName()); + lines = reader.readLines(excelSheet); } + return new DatasetFileLines(file, dataset.getDataSetCode(), lines, ignoreTrailingEmptyCells); } private static CsvReader readFile(File file, boolean ignoreComments, char separator) @@ -203,20 +176,6 @@ abstract public class AbstractFileTableReportingPlugin extends AbstractTableMode return csvReader; } - /** - * Loads data from the specified sheet. - * - * @throws IOException - */ - private DatasetFileLines load(DatasetDescription dataset, Sheet sheet, File file) - throws IOException - { - assert sheet != null : "Unspecified sheet"; - - List<String[]> lines = ExcelFileReaderHelper.loadLines(sheet, ignoreComments); - return new DatasetFileLines(file, dataset.getDataSetCode(), lines, ignoreTrailingEmptyCells); - } - /** * Loads data from the specified reader. */ diff --git a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReaderHelper.java b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReader.java similarity index 64% rename from datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReaderHelper.java rename to datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReader.java index 0cb1069d27936c65dada22d235bd5e1c958afadd..3639f1dd844462e55ae3233a4c922f1ea3dcdeec 100644 --- a/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReaderHelper.java +++ b/datastore_server/source/java/ch/systemsx/cisd/openbis/dss/generic/shared/utils/ExcelFileReader.java @@ -24,7 +24,7 @@ import java.util.Arrays; import java.util.List; import org.apache.commons.io.FilenameUtils; -import org.apache.poi.hssf.extractor.ExcelExtractor; +import org.apache.log4j.Logger; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.Cell; @@ -32,85 +32,102 @@ import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.util.CellReference; -import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import ch.systemsx.cisd.common.exceptions.UserFailureException; +import ch.systemsx.cisd.common.logging.LogCategory; +import ch.systemsx.cisd.common.logging.LogFactory; /** + * Reader of tabular data from excel files. + * * @author Piotr Buczek */ -public class ExcelFileReaderHelper +public class ExcelFileReader { - public static void main(String[] args) throws Exception + private static final Logger operationLog = LogFactory.getLogger(LogCategory.OPERATION, + ExcelFileReader.class); + + private final boolean ignoreComments; + + private final Workbook workbook; + + public ExcelFileReader(Workbook workbook, boolean ignoreComments) { - String filename = args[0]; - File file = new File(filename); - List<String[]> lines = loadExcelFile(file, true); - for (String[] line : lines) + this.ignoreComments = ignoreComments; + this.workbook = workbook; + } + + public List<String[]> readLines() throws IOException + { + Sheet sheet = workbook.getSheetAt(0); + return loadLines(sheet, ignoreComments); + } + + public List<String[]> readLines(int sheetIndex) throws IOException + { + Sheet sheet = workbook.getSheetAt(sheetIndex); + return loadLines(sheet, ignoreComments); + } + + public List<String[]> readLines(String sheetName) throws IOException + { + Sheet sheet = workbook.getSheet(sheetName); + if (sheet == null) { - System.err.println(Arrays.toString(line)); + throw new UserFailureException("Couldn't find sheet named " + sheetName); } + return loadLines(sheet, ignoreComments); + } + + // if the line starts with this character and comments should be ignored, the line is ignored + private static final String COMMENT = "#"; + + /** + * Uses file extension to figure out if given <var>file</var> is supported Excel file. + * + * @return <code>true</code> if the <var>file</var> is an XSL or XLSX Excel file + */ + public static boolean isExcelFile(File file) + { + return FilenameUtils.isExtension(file.getName().toLowerCase(), new String[] + { "xls", "xlsx" }); } - public static List<String[]> loadExcelFile(File file, boolean ignoreComments) - throws IOException + /** + * @return {@link Workbook} of an Excel <var>file</var> + * @throws IOException if an I/O problem occurs + * @throws IllegalArgumentException if the <var>file</var> is a format that is not supported + */ + public static Workbook getExcelWorkbook(File file) throws IOException, IllegalArgumentException { final String extension = FilenameUtils.getExtension(file.getName()).toLowerCase(); final FileInputStream stream = new FileInputStream(file); - Workbook wb = null; if ("xls".equals(extension)) { POIFSFileSystem poifsFileSystem = new POIFSFileSystem(stream); - HSSFWorkbook hssfWorkbook = new HSSFWorkbook(poifsFileSystem); - wb = hssfWorkbook; - // HSSFWorkbook wb = new HSSFWorkbook(poifsFileSystem); - - ExcelExtractor extractor = new ExcelExtractor(hssfWorkbook); - // HSSFFormulaEvaluator.evaluateAllFormulaCells(hssfWorkbook); - // - extractor.setIncludeBlankCells(true); - extractor.setIncludeSheetNames(false); - String text = extractor.getText(); - System.out.println(text);// returns TSV file - + return new HSSFWorkbook(poifsFileSystem); } else if ("xlsx".equals(extension)) { - // spaces are not ignored - XSSFWorkbook xssfWorkbook = new XSSFWorkbook(stream); - wb = xssfWorkbook; - - // ExcelExtractor extractor = new ExcelExtractor(xssfWorkbook); - // - XSSFExcelExtractor extractor = new XSSFExcelExtractor(xssfWorkbook); - // XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(xssfWorkbook); - // XSSFFormulaEvaluator.evaluateAllFormulaCells(xssfWorkbook); - - extractor.setIncludeSheetNames(false); - String text = extractor.getText(); - System.out.println(text);// returns TSV file + return new XSSFWorkbook(stream); } else { - System.err.println(extension); throw new IllegalArgumentException( "Expected an Excel file with 'xls' or 'xlsx' extension, got " + file.getName()); } - - final Sheet sheet = wb.getSheetAt(0); - - return loadLines(sheet, ignoreComments); } - // if the line starts with this character and comments should be ignored, the line is ignored - private static final String COMMENT = "#"; + // + // helper methods + // - public static List<String[]> loadLines(Sheet sheet, boolean ignoreComments) throws IOException + private static List<String[]> loadLines(Sheet sheet, boolean ignoreComments) throws IOException { final List<String[]> lines = new ArrayList<String[]>(); boolean firstLine = true; - // NOTE: the following code is pretty ugly - it is because poi API is very limited + // NOTE: the following code is pretty ugly because poi API is very limited int headerSize = 0; for (Row row : sheet) { @@ -126,7 +143,10 @@ public class ExcelFileReaderHelper for (Cell cell : row) { String value = extractCellValue(cell); - System.out.println("cell " + extractCellPosition(cell) + ":" + value); + if (operationLog.isDebugEnabled()) + { + operationLog.debug(extractCellPosition(cell) + ": " + value); + } header[cell.getColumnIndex()] = value; } lines.add(header); @@ -137,7 +157,10 @@ public class ExcelFileReaderHelper for (Cell cell : row) { String value = extractCellValue(cell); - System.out.println("cell " + extractCellPosition(cell) + ":" + value); + if (operationLog.isDebugEnabled()) + { + operationLog.debug(extractCellPosition(cell) + ": " + value); + } if (cell.getColumnIndex() >= line.length) { continue; // ignore for now @@ -148,10 +171,14 @@ public class ExcelFileReaderHelper } } - for (String[] line : lines) + if (operationLog.isDebugEnabled()) { - System.err.println(Arrays.toString(line)); + for (String[] line : lines) + { + operationLog.debug(Arrays.toString(line)); + } } + return lines; } @@ -197,4 +224,21 @@ public class ExcelFileReaderHelper String row = "" + (cell.getRowIndex() + 1); return col + row; } + + // + // for testing + // + + public static void main(String[] args) throws Exception + { + String filename = args[0]; + File file = new File(filename); + Workbook wb = getExcelWorkbook(file); + ExcelFileReader helper = new ExcelFileReader(wb, true); + List<String[]> lines = helper.readLines(); + for (String[] line : lines) + { + System.err.println(Arrays.toString(line)); + } + } }