/*******************************************************************************
*
* Pentaho Big Data
*
* Copyright (C) 2002-2012 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.di.trans.steps.cassandrasstableoutput;
import java.util.HashMap;
import java.util.Map;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
/**
* Output step for writing Cassandra SSTables (sorted-string tables).
*
* @author Rob Turner (robert{[at]}robertturner{[dot]}com{[dot]}au)
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*/
public class SSTableOutput extends BaseStep implements StepInterface {
protected SSTableOutputMeta m_meta;
protected SSTableOutputData m_data;
public SSTableOutput(StepMeta stepMeta, StepDataInterface stepDataInterface,
int copyNr, TransMeta transMeta, Trans trans) {
super(stepMeta, stepDataInterface, copyNr, transMeta, trans);
}
/** The number of rows seen so far for this batch */
protected int rowsSeen;
/** The directory to output to */
protected String directory;
/** The keyspace to use */
protected String keyspace;
/** The name of the column family (table) to write to */
protected String columnFamily;
/** The key field used to determine unique keys (IDs) for rows */
protected String keyField;
/** Size (MB) of write buffer */
protected String bufferSize;
/** Writes the SSTable output */
protected SSTableWriter writer;
/** Used to determine input fields */
protected RowMetaInterface inputMetadata;
/** List of field names (optimization) */
private String[] fieldNames;
/** List of field indices (optimization) */
private int[] fieldValueIndices;
private void initialize(StepMetaInterface smi, StepDataInterface sdi)
throws Exception {
first = false;
rowsSeen = 0;
m_meta = (SSTableOutputMeta) smi;
m_data = (SSTableOutputData) sdi;
inputMetadata = getInputRowMeta();
String yamlPath = environmentSubstitute(m_meta.getYamlPath());
if (Const.isEmpty(yamlPath)) {
throw new Exception(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Error.NoPathToYAML"));
}
logBasic(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Message.YAMLPath", yamlPath));
System.setProperty("cassandra.config", "file:" + yamlPath);
directory = environmentSubstitute(m_meta.getDirectory());
keyspace = environmentSubstitute(m_meta.getCassandraKeyspace());
columnFamily = environmentSubstitute(m_meta.getColumnFamilyName());
keyField = environmentSubstitute(m_meta.getKeyField());
bufferSize = environmentSubstitute(m_meta.getBufferSize());
if (Const.isEmpty(columnFamily)) {
throw new KettleException(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Error.NoColumnFamilySpecified"));
}
if (Const.isEmpty(keyField)) {
throw new KettleException(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Error.NoKeySpecified"));
}
// what are the fields? where are they?
fieldNames = inputMetadata.getFieldNames();
fieldValueIndices = new int[fieldNames.length];
for (int i = 0; i < fieldNames.length; i++) {
fieldValueIndices[i] = inputMetadata.indexOfValue(fieldNames[i]);
}
// create/init writer
if (writer != null) {
writer.close();
}
writer = new SSTableWriter();
writer.setDirectory(directory);
writer.setKeyspace(keyspace);
writer.setColumnFamily(columnFamily);
writer.setKeyField(keyField);
writer.setBufferSize(Integer.parseInt(bufferSize));
writer.init();
}
@Override
public boolean processRow(StepMetaInterface smi, StepDataInterface sdi)
throws KettleException {
// still processing?
if (isStopped()) {
return false;
}
Object[] r = getRow();
try {
if (r == null) {
// no more output - clean up/close connections
setOutputDone();
closeWriter();
return false;
}
if (first) {
initialize(smi, sdi);
}
// create record
Map<String, Object> record = new HashMap<String, Object>();
for (int i = 0; i < fieldNames.length; i++) {
Object value = r[fieldValueIndices[i]];
if (SSTableWriter.isNull(value)) {
continue;
}
record.put(fieldNames[i], value);
}
// write it
writer.processRow(record);
} catch (Exception e) {
logError(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Error.FailedToProcessRow"), e);
// single error row - found it!
putError(getInputRowMeta(), r, 1L, e.getMessage(), null,
"ERR_SSTABLE_OUTPUT_01");
}
// error will occur after adding it
return true;
}
@Override
public void setStopped(boolean stopped) {
super.setStopped(stopped);
if (stopped) {
closeWriter();
}
}
public void closeWriter() {
if (writer != null) {
try {
writer.close();
writer = null;
} catch (Exception e) {
// YUM!!
logError(BaseMessages.getString(SSTableOutputMeta.PKG,
"SSTableOutput.Error.FailedToCloseWriter"), e);
}
}
}
}