/* * File : $Source: /alkacon/cvs/alkacon/com.alkacon.opencms.excelimport/src/com/alkacon/opencms/excelimport/CmsExcelContent.java,v $ * Date : $Date: 2010/09/07 11:03:14 $ * Version: $Revision: 1.2 $ * * This library is part of OpenCms - * the Open Source Content Management System * * Copyright (C) 2002 - 2009 Alkacon Software (http://www.alkacon.com) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * For further information about Alkacon Software, please see the * company website: http://www.alkacon.com * * For further information about OpenCms, please see the * project website: http://www.opencms.org * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package com.alkacon.opencms.excelimport; import org.opencms.file.CmsObject; import org.opencms.main.CmsLog; import org.opencms.search.extractors.CmsExtractorMsOfficeOLE2; import org.opencms.search.extractors.I_CmsExtractionResult; import org.opencms.search.extractors.I_CmsTextExtractor; import org.opencms.util.CmsStringUtil; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; /** * Includes contents from an excel file. Here are saved the full content and property values. * Also the on the content column wise sorted can become accessed.<p> * * On read content items can become accessed.<p> * * @author Mario Jaeger * * @version $Revision: 1.2 $ * * @since 7.5.0 */ public class CmsExcelContent { /** Key to access the category property. */ public static String PROPERTY_CATEGORY = "category"; /** Key to access the keywords property. */ public static String PROPERTY_KEYWORDS = "keywords"; /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(CmsExcelContent.class); /** The column contents. */ private Map m_colContents = new HashMap(); /** The column names. */ private Map m_colNames = new HashMap(); /** The extracted individual content items. */ private Map m_contentItems; /** The name from excel file. */ private String m_excelName; /** The number of rows without headline. */ private int m_rowNumber; /** * Gets the category property value from excel file.<p> * * @return the folder property value from excel file */ public String getCategoryProperty() { String categoryProperty = ""; if (m_contentItems != null) { categoryProperty = (String)m_contentItems.get(CmsExcelContent.PROPERTY_CATEGORY); } return categoryProperty; } /** * Get cell content from given column and row.<p> * * @param userColName name from column set by user where cell is * @param row number of row where cell is * * @return cell content from given column and raw */ public String getCellContent(String userColName, int row) { String cellContent = ""; // get list with all raw entries from given raw name if ((m_colNames != null) && (m_colContents != null) && m_colNames.containsKey(userColName)) { Integer excelColName = (Integer)m_colNames.get(userColName); CmsExcelColumn cmsExcelRow = (CmsExcelColumn)m_colContents.get(excelColName); if ((cmsExcelRow != null)) { // get from row list given number entry cellContent = cmsExcelRow.getCellStringValue(row); } } return cellContent; } /** * Gets map with column names with key as column name set by user and value as column name by excel internal.<p> * * @return map with column names with key as column name set by user and value as column name by excel internal */ public Map getColumnNames() { return m_colNames; } /** * Gets full content from excel file.<p> * * @return full content from excel file */ public String getContent() { String content = ""; if (m_contentItems != null) { content = (String)m_contentItems.get(I_CmsExtractionResult.ITEM_CONTENT); } return content; } /** * Gets name from excel file.<p> * * @return name from excel file */ public String getExcelName() { if (CmsStringUtil.isNotEmpty(m_excelName)) { return m_excelName; } else { return ""; } } /** * Gets the folder property value from excel file.<p> * * @return the folder property value from excel file */ public String getFolderProperty() { String folderProperty = ""; if (m_contentItems != null) { folderProperty = (String)m_contentItems.get(CmsExcelContent.PROPERTY_KEYWORDS); } return folderProperty; } /** * Get number of rows in excel file.<p> * * @return number of rows in excel file */ public int getNumberOfRecords() { return m_rowNumber; } /** * Get one row from excel file with values as map with column name as key and value as value.<p> * * @param row number from row to get values to * * @return one row with values as map with column name as key and value as value */ public Map getRecord(int row) { HashMap record = new HashMap(); Iterator iter = m_colNames.keySet().iterator(); while (iter.hasNext()) { String userRowName = (String)iter.next(); String value = getCellContent(userRowName, row); record.put(userRowName, value); } return record; } /** * Checks if excel record is empty.<p> * * @param row Number of current row * * @return True, if excel current is empty, otherwise false */ public boolean isEmptyRecord(int row) { boolean isEmpty = true; Map contents = getRecord(row); Iterator iter = contents.values().iterator(); while (iter.hasNext()) { String content = (String)iter.next(); if (CmsStringUtil.isNotEmpty(content)) { isEmpty = false; } } return isEmpty; } /** * Reads content from excel file. Reads the category property value and also reads contents per column.<p> * * @param cmsObject current CmsObject * @param excelName name from excel file * @param excelContent content from excel file to read */ public void readExcelFile(CmsObject cmsObject, String excelName, byte[] excelContent) { if ((cmsObject != null) && (excelContent != null)) { m_excelName = excelName; String encoding = cmsObject.getRequestContext().getEncoding(); I_CmsTextExtractor cmsTextExtractorMsExcel = CmsExtractorMsOfficeOLE2.getExtractor(); try { // read content per rows readExcelRows(excelContent); // read content // this is necessary because in search input stream is buffered byte[] emptyByte = "".getBytes(); I_CmsExtractionResult cmsExtractionsResult = null; try { cmsExtractionsResult = cmsTextExtractorMsExcel.extractText(emptyByte); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(e.toString()); } } ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(excelContent); cmsExtractionsResult = cmsTextExtractorMsExcel.extractText(byteArrayInputStream); m_contentItems = cmsExtractionsResult.getContentItems(); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(e.toString()); } } } } /** * Reads the excel file row by row. Fills the excel import internal structure which is column wise.<p> * * @param in the document input stream * * @throws IOException if something goes wring */ private void readExcelColumnContents(InputStream in) throws IOException { HSSFWorkbook excelWb = new HSSFWorkbook(in); HSSFSheet sheet = excelWb.getSheetAt(0); int rowsNumber = sheet.getPhysicalNumberOfRows(); if (rowsNumber > 0) { // loop over all rows from excel // do not read first column, because here are only user raw names for (int rowCounter = 1; rowCounter < sheet.getPhysicalNumberOfRows(); rowCounter++) { HSSFRow row = sheet.getRow(rowCounter); if ((row != null)) { // get number of rows in excel if ((rowCounter) > m_rowNumber) { m_rowNumber = rowCounter; } // loop over all columns in this row for (int columnCounter = 0; columnCounter < row.getLastCellNum(); columnCounter++) { CmsExcelColumn cmsExcelCol = (CmsExcelColumn)m_colContents.get(new Integer(columnCounter)); if (cmsExcelCol != null) { // read cell HSSFCell cell = row.getCell((short)columnCounter); if (cell != null) { String text = null; try { // read cell content from excel switch (cell.getCellType()) { case Cell.CELL_TYPE_BLANK: case Cell.CELL_TYPE_ERROR: // ignore all blank or error cells break; case Cell.CELL_TYPE_NUMERIC: // check for date double d = cell.getNumericCellValue(); if (DateUtil.isCellDateFormatted(cell) && DateUtil.isValidExcelDate(d)) { // valid date Date date = DateUtil.getJavaDate(d); text = new Long(date.getTime()).toString(); } else { // no valid date text = "" + d; if (text.endsWith(".0")) { text = text.substring(0, text.lastIndexOf(".0")); } } break; case Cell.CELL_TYPE_BOOLEAN: text = Boolean.toString(cell.getBooleanCellValue()); break; case Cell.CELL_TYPE_STRING: default: text = cell.getStringCellValue(); break; } // add to column list cmsExcelCol.addNewCellValue(rowCounter, text); m_colContents.put(new Integer(columnCounter), cmsExcelCol); } catch (Exception e) { if (LOG.isErrorEnabled()) { LOG.error(e.toString()); } } } else { // add to column list cmsExcelCol.addNewCellValue(rowCounter, ""); m_colContents.put(new Integer(columnCounter), cmsExcelCol); } } } } } } } /** * Creates mapping between excel column names set by user and excel column names from excel internal.<p> * * @param in the document input stream * * @throws IOException if something goes wring */ private void readExcelColumnMappings(InputStream in) throws IOException { HSSFWorkbook excelWb = new HSSFWorkbook(in); HSSFSheet sheet = excelWb.getSheetAt(0); int numberOfRows = sheet.getPhysicalNumberOfRows(); if (numberOfRows > 0) { HSSFRow firstRow = sheet.getRow(0); // loop over all columns in first excel row Iterator rowIter = firstRow.cellIterator(); while (rowIter.hasNext()) { // get cell HSSFCell cell = (HSSFCell)rowIter.next(); if (cell != null) { // get user column name String userColName = cell.getStringCellValue(); // get excel column name int excelColName = cell.getCellNum(); CmsExcelColumn excelCol = new CmsExcelColumn(userColName, excelColName); m_colNames.put(userColName, new Integer(excelColName)); m_colContents.put(new Integer(excelColName), excelCol); } } } } /** * Reads the column names set by user and from excel internal and the content from excel file.<p> * * @param fileBytes excel file as file bytes * * @throws IOException if something goes wring */ private void readExcelRows(byte[] fileBytes) throws IOException { readExcelColumnMappings(new ByteArrayInputStream(fileBytes)); readExcelColumnContents(new ByteArrayInputStream(fileBytes)); } }