/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.model;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.ANNOTATION_NAME;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.PARAMETER_ANNOTATIONS;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.PARAMETER_DATE_FORMAT;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.PARAMETER_FIRST_ROW_AS_NAMES;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.PARAMETER_LOCALE;
import static com.rapidminer.operator.nio.model.AbstractDataResultSetReader.PARAMETER_META_DATA;
import java.text.DateFormat;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.operator.Annotations;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.io.ExampleSource;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDInteger;
import com.rapidminer.operator.preprocessing.filter.AbstractDateDataProcessing;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeTupel;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
/**
* This class holds information how a DataResultSet is translated into an ExampleSet. Therefore it holds information
* about the final name, the value type, role and if the column is selected at all.
*
* @author Sebastian Land, Simon Fischer
*/
public class DataResultSetTranslationConfiguration {
private ColumnMetaData[] columnMetaData;
private Locale locale = Locale.getDefault();
private String datePattern = "";
private final SortedMap<Integer, String> annotationsMap = new TreeMap<Integer, String>();
private boolean faultTolerant = true;
private DateFormat dateFormat;
private NumberFormat numberFormat;
private int dataManagementType = DataRowFactory.TYPE_DOUBLE_ARRAY;
/**
* This constructor can be used to generate an empty configuration just depending on the given resultSet
*
* @param resultSet
* @throws OperatorException
*/
public DataResultSetTranslationConfiguration(AbstractDataResultSetReader readerOperator) {
this(readerOperator, null);
}
/**
* Creates the configuration based on the parameter values stored in the given reader. If these parameters aren't
* present they are derived from the data result set delivered and everything will just be passed. This means, names
* are identically as delivered from the underlying result set, value type will be the one fitting, everything is
* selected, no roles are defined.
*
* @throws OperatorException
*/
private DataResultSetTranslationConfiguration(AbstractDataResultSetReader readerOperator, DataResultSet dataResultSet) {
reconfigure(dataResultSet);
reconfigure(readerOperator);
}
public void reconfigure(AbstractDataResultSetReader readerOperator) {
// reading parameter settings
if (readerOperator != null) {
try {
dataManagementType = readerOperator.getParameterAsInt(ExampleSource.PARAMETER_DATAMANAGEMENT);
} catch (UndefinedParameterError e1) {
dataManagementType = DataRowFactory.TYPE_DOUBLE_ARRAY;
}
List<String[]> annotations;
try {
annotations = readerOperator.getParameterList(PARAMETER_ANNOTATIONS);
} catch (UndefinedParameterError e) {
annotations = Collections.emptyList();
}
for (String[] annotation : annotations) {
annotationsMap.put(Integer.parseInt(annotation[0]), annotation[1]);
}
boolean firstRowAsNames = readerOperator.getParameterAsBoolean(PARAMETER_FIRST_ROW_AS_NAMES);
if (firstRowAsNames) {
annotationsMap.put(0, ANNOTATION_NAME);
}
// reading date format settings
try {
setDatePattern(readerOperator.getParameterAsString(PARAMETER_DATE_FORMAT));
} catch (UndefinedParameterError e) {
setDatePattern("");
}
try {
int localeIndex;
localeIndex = readerOperator.getParameterAsInt(PARAMETER_LOCALE);
if (localeIndex >= 0 && localeIndex < AbstractDateDataProcessing.availableLocales.size())
locale = AbstractDateDataProcessing.availableLocales.get(localeIndex);
} catch (UndefinedParameterError e) {
locale = Locale.getDefault();
}
// initializing data structures
List<String[]> metaDataSettings;
if (readerOperator.isParameterSet(PARAMETER_META_DATA)) {
try {
metaDataSettings = readerOperator.getParameterList(PARAMETER_META_DATA);
} catch (UndefinedParameterError e) {
metaDataSettings = Collections.emptyList();
}
} else {
metaDataSettings = Collections.emptyList();
}
// find largest used column index
int maxUsedColumnIndex = -1;
for (String[] metaDataDefinition : metaDataSettings) {
int columnIndex= Integer.parseInt(metaDataDefinition[0]);
maxUsedColumnIndex = Math.max(maxUsedColumnIndex, columnIndex);
}
// initialize with values from settings
//columnMetaData = new ColumnMetaData[metaDataSettings.size()];
columnMetaData = new ColumnMetaData[maxUsedColumnIndex+1];
for (String[] metaDataDefinition : metaDataSettings) {
int currentColumn = Integer.parseInt(metaDataDefinition[0]);
String[] metaDataDefintionValues = ParameterTypeTupel.transformString2Tupel(metaDataDefinition[1]);
columnMetaData[currentColumn] = new ColumnMetaData();
final ColumnMetaData cmd = columnMetaData[currentColumn];
cmd.setSelected(Boolean.parseBoolean(metaDataDefintionValues[1]));
if (cmd.isSelected()) { // otherwise details don't matter
cmd.setRole(metaDataDefintionValues[3].trim());
cmd.setUserDefinedAttributeName(metaDataDefintionValues[0].trim());
int valueType = Ontology.ATTRIBUTE_VALUE_TYPE.mapName(metaDataDefintionValues[2]);
// fallback for old processes where attribute value type was saved as index rather than as string
if (valueType == -1) {
cmd.setAttributeValueType(Integer.parseInt(metaDataDefintionValues[2]));
} else {
cmd.setAttributeValueType(valueType);
}
}
}
// replace those which were not specified in the list by an empty ColumnMetaData (so it is at least not null)
for (int i = 0; i < columnMetaData.length; i++) {
if (columnMetaData[i] == null) {
columnMetaData[i] = new ColumnMetaData();
}
}
setFaultTolerant(readerOperator.getParameterAsBoolean(AbstractDataResultSetReader.PARAMETER_ERROR_TOLERANT));
} else {
annotationsMap.put(0, ANNOTATION_NAME);
}
}
public void reconfigure(DataResultSet dataResultSet) {
if (dataResultSet != null) {
int numberOfColumns = dataResultSet.getNumberOfColumns();
columnMetaData = new ColumnMetaData[numberOfColumns];
final String[] originalColumnNames = dataResultSet.getColumnNames();
int[] attributeValueTypes = dataResultSet.getValueTypes();
for (int i = 0; i < numberOfColumns; i++) {
columnMetaData[i] = new ColumnMetaData(originalColumnNames[i],
originalColumnNames[i],
attributeValueTypes[i],
Attributes.ATTRIBUTE_NAME,
true);
}
}
}
/** Sets the parameters in the given operator to describe this configuration. */
public void setParameters(AbstractDataResultSetReader operator) {
operator.getParameters().setParameter(PARAMETER_DATE_FORMAT, getDatePattern());
// meta data
List<String[]> metaDataList = new LinkedList<String[]>();
int index = 0;
for (ColumnMetaData cmd : getColumnMetaData()) {
String[] tupel = new String[4];
tupel[0] = cmd.getUserDefinedAttributeName();
tupel[1] = String.valueOf(cmd.isSelected());
tupel[2] = Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(cmd.getAttributeValueType());
tupel[3] = cmd.getRole();
String encodedTupel = ParameterTypeTupel.transformTupel2String(tupel);
metaDataList.add(new String[] { String.valueOf(index), encodedTupel} );
index++;
}
operator.getParameters().setParameter(PARAMETER_META_DATA, ParameterTypeList.transformList2String(metaDataList));
// annotations
List<String[]> annotationList = new LinkedList<String[]>();
for (Entry<Integer, String> annotation : annotationsMap.entrySet()) {
annotationList.add(new String[] { annotation.getKey().toString(), annotation.getValue() });
}
operator.setParameter(PARAMETER_ANNOTATIONS, ParameterTypeList.transformList2String(annotationList));
operator.getParameters().setParameter(AbstractDataResultSetReader.PARAMETER_ERROR_TOLERANT, String.valueOf(isFaultTolerant()));
operator.getParameters().setParameter(PARAMETER_FIRST_ROW_AS_NAMES, "false");
}
public ColumnMetaData getColumnMetaData(int col) {
if (columnMetaData != null && col < columnMetaData.length) {
return columnMetaData[col];
} else {
return null;
}
}
/**
* This will return all indices of each selected column
*/
public int[] getSelectedIndices() {
int numberOfSelected = 0;
int[] selectedIndices = new int[columnMetaData.length];
for (int i = 0; i < selectedIndices.length; i++) {
if (columnMetaData[i].isSelected()) {
selectedIndices[numberOfSelected] = i;
numberOfSelected++;
}
}
if (numberOfSelected < selectedIndices.length) {
int[] result = new int[numberOfSelected];
System.arraycopy(selectedIndices, 0, result, 0, numberOfSelected);
return result;
} else {
return selectedIndices;
}
}
/**
* This returns the annotation of a line or null if no present
*/
public String getAnnotation(int line) {
return annotationsMap.get(line);
}
public SortedSet<Integer> getAnnotatedRowIndices() {
SortedSet<Integer> result = new TreeSet<Integer>();
// for (Entry<Integer, String> entry : annotationsMap.entrySet()) {
// if (entry.getValue() != null) {
// result.add(entry.getKey());
// }
// }
result.addAll(annotationsMap.keySet());
return result;
}
public Map<Integer, String> getAnnotationsMap() {
return annotationsMap;
}
// public void setAnnotationsMap(TreeMap<Integer, String> annotationsMap) {
// this.annotationsMap = annotationsMap;
// }
/** Returns the row annotated to be used as the name of the attribute or -1
* if no such row was selected. */
public int getNameRow() {
if (annotationsMap == null) {
return -1;
} else {
for (Entry<Integer, String> entry : annotationsMap.entrySet()) {
if (Annotations.ANNOTATION_NAME.equals(entry.getValue())) {
return entry.getKey();
}
}
return -1;
}
}
public int getNumerOfColumns() {
return columnMetaData.length;
}
public ColumnMetaData[] getColumnMetaData() {
return columnMetaData;
}
public void setFaultTolerant(boolean faultTolerant) {
this.faultTolerant = faultTolerant;
}
public boolean isFaultTolerant() {
return faultTolerant;
}
public int getLastAnnotatedRowIndex() {
if (annotationsMap == null || annotationsMap.isEmpty()) {
return -1;
}
SortedSet<Integer> annotatedRows = getAnnotatedRowIndices();
return annotatedRows.last();
}
public void resetValueTypes() {
for (ColumnMetaData cmd : columnMetaData) {
cmd.setAttributeValueType(Ontology.ATTRIBUTE_VALUE);
}
}
public DateFormat getDateFormat() {
if (dateFormat == null) {
if (getDatePattern() != null && !getDatePattern().isEmpty()) {
this.dateFormat = new SimpleDateFormat(getDatePattern(), locale);
} else {
this.dateFormat = DateFormat.getDateTimeInstance();
}
}
return this.dateFormat;
}
public String getDatePattern() {
return datePattern;
}
public void setDatePattern(String datePattern) {
this.datePattern = datePattern;
dateFormat = null;
}
@Override
public String toString() {
return "Annotations: "+annotationsMap+"; columns: "+Arrays.toString(columnMetaData);
}
public void addColumnMetaData(ExampleSetMetaData emd) {
MDInteger numberOfExamples = emd.getNumberOfExamples();
numberOfExamples.subtract(annotationsMap.size());
for (ColumnMetaData cmd : columnMetaData) {
if (cmd.isSelected())
emd.addAttribute(cmd.getAttributeMetaData());
}
}
/** Returns true if meta data is manually set. */
public boolean isComplete() {
return columnMetaData != null && columnMetaData.length > 0;
}
public void setNumberFormat(NumberFormat numberFormat) {
this.numberFormat = numberFormat;
}
public NumberFormat getNumberFormat() {
return numberFormat;
}
public int getDataManagementType() {
return dataManagementType;
}
}