/*
* RapidMiner
*
* Copyright (C) 2001-2014 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio;
import java.text.NumberFormat;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.io.AbstractReader;
import com.rapidminer.operator.nio.model.AbstractDataResultSetReader;
import com.rapidminer.operator.nio.model.CSVResultSetConfiguration;
import com.rapidminer.operator.nio.model.DataResultSetFactory;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeChar;
import com.rapidminer.parameter.ParameterTypeConfiguration;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.tools.DateParser;
import com.rapidminer.tools.LineParser;
import com.rapidminer.tools.StrictDecimalFormat;
/**
*
* <p>
* This operator can be used to load data from .csv files. The user can specify the delimiter and various other parameters.
* </p>
*
* @author Ingo Mierswa, Tobias Malbrecht, Sebastian Loh, Sebastian Land, Simon Fischer
*/
public class CSVExampleSource extends AbstractDataResultSetReader {
public static final String PARAMETER_CSV_FILE = "csv_file";
public static final String PARAMETER_TRIM_LINES = "trim_lines";
public static final String PARAMETER_SKIP_COMMENTS = "skip_comments";
public static final String PARAMETER_COMMENT_CHARS = "comment_characters";
public static final String PARAMETER_USE_QUOTES = "use_quotes";
public static final String PARAMETER_QUOTES_CHARACTER = "quotes_character";
public static final String PARAMETER_COLUMN_SEPARATORS = "column_separators";
public static final String PARAMETER_ESCAPE_CHARACTER = "escape_character";
static {
AbstractReader.registerReaderDescription(new ReaderDescription("csv", CSVExampleSource.class, PARAMETER_CSV_FILE));
}
public CSVExampleSource(OperatorDescription description) {
super(description);
}
@Override
protected DataResultSetFactory getDataResultSetFactory() throws OperatorException {
return new CSVResultSetConfiguration(this);
}
@Override
protected NumberFormat getNumberFormat() throws OperatorException {
return StrictDecimalFormat.getInstance(this, true);
}
@Override
protected boolean supportsEncoding() {
return true;
}
@Override
protected String getFileParameterName() {
return PARAMETER_CSV_FILE;
}
@Override
protected String getFileExtension() {
return "csv";
}
@Override
public List<ParameterType> getParameterTypes() {
LinkedList<ParameterType> types = new LinkedList<ParameterType>();
ParameterType type = new ParameterTypeConfiguration(CSVExampleSourceConfigurationWizardCreator.class, this);
type.setExpert(false);
types.add(type);
//types.add(new ParameterTypeFile(PARAMETER_CSV_FILE, "Name of the file to read the data from.", "csv", false));
types.add(makeFileParameterType());
//types.addAll(Encoding.getParameterTypes(this));
// Separator
types.add(new ParameterTypeString(PARAMETER_COLUMN_SEPARATORS, "Column separators for data files (regular expression)", ";", false));
types.add(new ParameterTypeBoolean(
PARAMETER_TRIM_LINES,
"Indicates if lines should be trimmed (empty spaces are removed at the beginning and the end) before the column split is performed. This option might be problematic if TABs are used as a seperator.",
false));
// Quotes
types.add(new ParameterTypeBoolean(PARAMETER_USE_QUOTES, "Indicates if quotes should be regarded.", true, false));
type = new ParameterTypeChar(PARAMETER_QUOTES_CHARACTER, "The quotes character.", LineParser.DEFAULT_QUOTE_CHARACTER, false);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_QUOTES, false, true));
types.add(type);
type = new ParameterTypeChar(PARAMETER_ESCAPE_CHARACTER, "The character that is used to escape quotes and column seperators", LineParser.DEFAULT_QUOTE_ESCAPE_CHARACTER, true);
types.add(type);
// Comments
types.add(new ParameterTypeBoolean(PARAMETER_SKIP_COMMENTS, "Indicates if a comment character should be used.", false, false));
type = new ParameterTypeString(PARAMETER_COMMENT_CHARS, "Lines beginning with these characters are ignored.", "#", false);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_SKIP_COMMENTS, false, true));
types.add(type);
// Numberformats
types.addAll(StrictDecimalFormat.getParameterTypes(this, true));
types.addAll(DateParser.getParameterTypes(this));
types.addAll(super.getParameterTypes());
return types;
}
}