/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.sql.Date;
import java.text.DateFormat;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ports.Port;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.PortProvider;
import com.rapidminer.parameter.conditions.PortConnectedCondition;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.io.Encoding;
/**
* <p>
* This operator can be used to write data into CSV files (Comma Separated
* Values). The values and columns are separated by ";". Missing data
* values are indicated by empty cells.
* </p>
*
* @author Ingo Mierswa
*/
public class CSVExampleSetWriter extends AbstractStreamWriter {
/** The parameter name for "The CSV file which should be written." */
public static final String PARAMETER_CSV_FILE = "csv_file";
/** The parameter name for the column separator parameter. */
public static final String PARAMETER_COLUMN_SEPARATOR = "column_separator";
/** Indicates if the attribute names should be written as first row. */
public static final String PARAMETER_WRITE_ATTRIBUTE_NAMES = "write_attribute_names";
/**
* Indicates if nominal values should be quoted with double quotes. Quotes
* inside of nominal values will be escaped by a backslash.
*/
public static final String PARAMETER_QUOTE_NOMINAL_VALUES = "quote_nominal_values";
public static final String PARAMETER_APPEND_FILE = "append_to_file";
/**
* Indicates if date attributes are written as a formated string or as
* milliseconds past since January 1, 1970, 00:00:00 GMT
*/
// TODO introduce parameter which allows to determine the written format see
// Nominal2Date operator
public static final String PARAMETER_FORMAT_DATE = "format_date_attributes";
public CSVExampleSetWriter(OperatorDescription description) {
super(description);
}
public static void writeCSV(ExampleSet exampleSet, PrintWriter out, String colSeparator, boolean quoteNomValues, boolean writeAttribNames, boolean formatDate) {
String columnSeparator = colSeparator;
boolean quoteNominalValues = quoteNomValues;
// write column names
if (writeAttribNames) {
Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
boolean first = true;
while (a.hasNext()) {
if (!first)
out.print(columnSeparator);
Attribute attribute = a.next();
String name = attribute.getName();
if (quoteNominalValues) {
name = name.replaceAll("\"", "'");
name = "\"" + name + "\"";
}
out.print(name);
first = false;
}
out.println();
}
// write data
for (Example example : exampleSet) {
Iterator<Attribute> a = exampleSet.getAttributes().allAttributes();
boolean first = true;
while (a.hasNext()) {
Attribute attribute = a.next();
if (!first)
out.print(columnSeparator);
if (!Double.isNaN(example.getValue(attribute))) {
if (attribute.isNominal()) {
String stringValue = example.getValueAsString(attribute);
if (quoteNominalValues) {
stringValue = stringValue.replaceAll("\"", "'");
stringValue = "\"" + stringValue + "\"";
}
out.print(stringValue);
} else {
Double value = example.getValue(attribute);
if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
if (formatDate) {
Date date = new Date(value.longValue());
String s = DateFormat.getInstance().format(date);
out.print(s);
} else {
out.print(value);
}
} else {
out.print(value);
}
}
}
first = false;
}
out.println();
}
}
@Override
public void writeStream(ExampleSet exampleSet, java.io.OutputStream outputStream) throws OperatorException {
String columnSeparator = getParameterAsString(PARAMETER_COLUMN_SEPARATOR);
boolean quoteNominalValues = getParameterAsBoolean(PARAMETER_QUOTE_NOMINAL_VALUES);
boolean writeAttribNames = getParameterAsBoolean(PARAMETER_WRITE_ATTRIBUTE_NAMES);
boolean formatDate = getParameterAsBoolean(PARAMETER_FORMAT_DATE);
PrintWriter out = null;
try {
out = new PrintWriter(new OutputStreamWriter(outputStream, Encoding.getEncoding(this)));
writeCSV(exampleSet, out, columnSeparator, quoteNominalValues, writeAttribNames, formatDate);
out.flush();
} finally {
if (out != null) {
out.close();
}
}
}
@Override
protected boolean supportsEncoding() {
return true;
}
@Override
protected boolean shouldAppend() {
return getParameterAsBoolean(PARAMETER_APPEND_FILE);
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = new LinkedList<ParameterType>();
types.add(makeFileParameterType());
// types.add(new ParameterTypeFile(PARAMETER_CSV_FILE,
// "The CSV file which should be written.", "csv", false));
types.add(new ParameterTypeString(PARAMETER_COLUMN_SEPARATOR, "The column separator.", ";", false));
types.add(new ParameterTypeBoolean(PARAMETER_WRITE_ATTRIBUTE_NAMES, "Indicates if the attribute names should be written as first row.", true, false));
types.add(new ParameterTypeBoolean(PARAMETER_QUOTE_NOMINAL_VALUES, "Indicates if nominal values should be quoted with double quotes.", true, false));
types.add(new ParameterTypeBoolean(PARAMETER_FORMAT_DATE,
"Indicates if date attributes are written as a formated string or as milliseconds past since January 1, 1970, 00:00:00 GMT", true, true));
ParameterType type = new ParameterTypeBoolean(PARAMETER_APPEND_FILE,
"Indicates if new content should be appended to the file or if the pre-existing file content should be overwritten.", false, false);
type.registerDependencyCondition(new PortConnectedCondition(this, new PortProvider() {
@Override
public Port getPort() {
return fileOutputPort;
}
}, true, false));
types.add(type);
types.addAll(super.getParameterTypes());
return types;
}
@Override
String getFileParameterName() {
return PARAMETER_CSV_FILE;
}
@Override
String[] getFileExtensions() {
return new String[] { "csv" };
}
}