/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2012 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.di.trans.steps.cassandrasstableoutput; /* * Adapted from DataStax DataImportExample * http://www.datastax.com/wp-content/uploads/2011/08/DataImportExample.java * * Original Disclaimer: * This file is an example on how to use the Cassandra SSTableSimpleUnsortedWriter class to create * sstables from a csv input file. * While this has been tested to work, this program is provided "as is" with no guarantee. Moreover, * it's primary aim is toward simplicity rather than completness. In partical, don't use this as an * example to parse csv files at home. * */ import static org.apache.cassandra.utils.ByteBufferUtil.bytes; import java.io.File; import java.nio.ByteBuffer; import java.text.DateFormat; import java.util.Date; import java.util.Map; import java.util.Map.Entry; import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.io.sstable.SSTableSimpleUnsortedWriter; import org.apache.log4j.helpers.ISO8601DateFormat; import org.pentaho.di.core.exception.KettleException; /** * Outputs Cassandra SSTables (sorted-string tables) to a directory. * * Adapted from DataStax DataImportExample * http://www.datastax.com/wp-content/uploads/2011/08/DataImportExample.java * * @author Rob Turner (robert{[at]}robertturner{[dot]}com{[dot]}au) */ public class SSTableWriter { private static final DateFormat ISO8601 = ISO8601DateFormat.getInstance(); private static final int DEFAULT_BUFFER_SIZE_MB = 16; private String directory = System.getProperty("java.io.tmpdir"); private String keyspace; private String columnFamily; private String keyField; private int bufferSize = DEFAULT_BUFFER_SIZE_MB; private SSTableSimpleUnsortedWriter writer; /** * Set the directory to read the sstables from * * @param directory the directory to read the sstables from */ public void setDirectory(String directory) { this.directory = directory; } /** * Set the target keyspace * * @param keyspace the keyspace to use */ public void setKeyspace(String keyspace) { this.keyspace = keyspace; } /** * Set the column family (table) to load to. Note: it is assumed that * this column family exists in the keyspace apriori. * * @param columnFamily the column family to load to. */ public void setColumnFamily(String columnFamily) { this.columnFamily = columnFamily; } /** * Set the key field name * * @param keyField the key field name */ public void setKeyField(String keyField) { this.keyField = keyField; } /** * Set the buffer size (Mb) to use. A new table file is written * every time the buffer is full. * * @param bufferSize the size of the buffer to use */ public void setBufferSize(int bufferSize) { this.bufferSize = bufferSize; } /** * Initialization. Creates target directory if needed and establishes * the writer * * @throws Exception if a problem occurs */ public void init() throws Exception { File directory = new File(this.directory); if (!directory.exists()) { directory.mkdir(); } try { //TODO set parameter for null writer = new SSTableSimpleUnsortedWriter(directory, null, keyspace, columnFamily, AsciiType.instance, null, bufferSize); } catch (Throwable t) { throw new KettleException( "Failed to create SSTableSimpleUnsortedWriter", t); } } /** * Process a row of data * * @param record a row of data as a Map of column names to values * @throws Exception if a problem occurs */ public void processRow(Map<String, Object> record) throws Exception { // get UUID ByteBuffer uuid = valueToBytes(record.get(keyField)); // write record writer.newRow(uuid); long timestamp = System.currentTimeMillis() * 1000; for (Entry<String, Object> entry : record.entrySet()) { // get value Object value = entry.getValue(); if (isNull(value)) { continue; } // don't write the key as a column! if (entry.getKey().equals(keyField)) { continue; } // write writer.addColumn(bytes(entry.getKey()), valueToBytes(value), timestamp); } } private static final ByteBuffer valueToBytes(Object val) throws Exception { if (val instanceof String) { return bytes((String) val); } if (val instanceof Integer) { return bytes(((Integer) val).intValue()); } if (val instanceof Float) { return bytes(((Float) val).floatValue()); } if (val instanceof Boolean) { // will return "true" or "false" return bytes(val.toString()); } if (val instanceof Date) { // use ISO 8601 date format try { return bytes(ISO8601.format((Date) val)); } catch (ArrayIndexOutOfBoundsException e) { // something wrong with the date... just convert to string return bytes(val.toString()); } } if (val instanceof Long) { return bytes(((Long) val).longValue()); } if (val instanceof Double) { return bytes(((Double) val).doubleValue()); } if (val instanceof byte[]) { return ByteBuffer.wrap((byte[]) val); } // reduce to string return bytes(val.toString()); } static final boolean isNull(Object val) { if (val == null) { return true; } // empty strings are considered null in this context if (val instanceof String) { return "".equals(val); } return false; } /** * Close the writer * * @throws Exception if a problem occurs */ public void close() throws Exception { if (writer != null) { writer.close(); } } }