/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2012 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.cassandra; import java.math.BigDecimal; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.UUID; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.BooleanType; import org.apache.cassandra.db.marshal.CompositeType; import org.apache.cassandra.db.marshal.DateType; import org.apache.cassandra.db.marshal.DecimalType; import org.apache.cassandra.db.marshal.DoubleType; import org.apache.cassandra.db.marshal.DynamicCompositeType; import org.apache.cassandra.db.marshal.FloatType; import org.apache.cassandra.db.marshal.Int32Type; import org.apache.cassandra.db.marshal.IntegerType; import org.apache.cassandra.db.marshal.LexicalUUIDType; import org.apache.cassandra.db.marshal.LongType; import org.apache.cassandra.db.marshal.TypeParser; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.exceptions.SyntaxException; import org.apache.cassandra.thrift.CfDef; import org.apache.cassandra.thrift.Column; import org.apache.cassandra.thrift.ColumnDef; import org.apache.cassandra.thrift.CqlRow; import org.apache.cassandra.thrift.KeySlice; import org.apache.cassandra.thrift.KsDef; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.exception.KettleValueException; import org.pentaho.di.core.row.ValueMeta; import org.pentaho.di.core.row.ValueMetaInterface; import org.pentaho.di.i18n.BaseMessages; /** * Class encapsulating read-only schema information for a column family. Has * utility routines for converting between Cassandra meta data and Kettle meta * data, and for deserializing values. * * @author Mark Hall (mhall{[at]}pentaho{[dot]}com) * @version $Revision$ */ public class CassandraColumnMetaData { protected static final Class<?> PKG = CassandraColumnMetaData.class; public static final String UTF8 = "UTF-8"; /** Name of the column family this meta data refers to */ protected String m_columnFamilyName; // can be used as the key name /** Type of the key */ protected String m_keyValidator; // name of the class for key validation /** Type of the column names (used for sorting columns) */ protected String m_columnComparator; // name of the class for sorting column // names /** m_columnComparator converted to Charset encoding string */ protected String m_columnNameEncoding; /** * Default validator for the column family (table) - we can use this as the * type for any columns specified in a SELECT clause which *arent* in the meta * data */ protected String m_defaultValidationClass; /** Map of column names/types */ protected Map<String, String> m_columnMeta; /** Map of column names to indexed values (if any) */ protected Map<String, HashSet<Object>> m_indexedVals; /** Holds the schema textual description */ protected StringBuffer m_schemaDescription; /** * Constructor. * * @param conn connection to cassandra * @param columnFamily the name of the column family to maintain meta data * for. * @throws Exception if a problem occurs during connection or when fetching * meta data */ public CassandraColumnMetaData(CassandraConnection conn, String columnFamily) throws Exception { m_columnFamilyName = columnFamily; refresh(conn); } public String getDefaultValidationClass() { return m_defaultValidationClass; } /** * Refreshes the encapsulated meta data for the column family. * * @param conn the connection to cassandra to use for refreshing the meta data * @throws Exception if a problem occurs during connection or when fetching * meta data */ public void refresh(CassandraConnection conn) throws Exception { m_schemaDescription = new StringBuffer(); // column families KsDef keySpace = conn.describeKeyspace(); List<CfDef> colFams = null; if (keySpace != null) { colFams = keySpace.getCf_defs(); } else { throw new Exception(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.UnableToGetMetaDataForKeyspace", conn.m_keyspaceName)); } // look for the requested column family CfDef colDefs = null; for (CfDef fam : colFams) { String columnFamilyName = fam.getName(); // table name if (columnFamilyName.equals(m_columnFamilyName)) { m_schemaDescription.append("Column family: " + m_columnFamilyName); m_keyValidator = fam.getKey_validation_class(); // key type m_columnComparator = fam.getComparator_type(); // column names encoded // as m_defaultValidationClass = fam.getDefault_validation_class(); // default // column // type m_schemaDescription.append("\n\tKey validator: " + m_keyValidator); m_schemaDescription.append("\n\tColumn comparator: " + m_columnComparator); m_schemaDescription.append("\n\tDefault column validator: " + m_defaultValidationClass); /* * m_schemaDescription.append("\n\tDefault column validator: " + * m_defaultValidationClass * .substring(m_defaultValidationClass.lastIndexOf(".")+1, * m_defaultValidationClass.length())); */ // these seem to have disappeared between 0.8.6 and 1.0.0! /* * m_schemaDescription.append("\n\tMemtable operations: " + * fam.getMemtable_operations_in_millions()); * m_schemaDescription.append("\n\tMemtable throughput: " + * fam.getMemtable_throughput_in_mb()); * m_schemaDescription.append("\n\tMemtable flush after: " + * fam.getMemtable_flush_after_mins()); */ // these have disappeared between 1.0.8 and 1.1.0!! // m_schemaDescription.append("\n\tRows cached: " + // fam.getRow_cache_size()); // m_schemaDescription.append("\n\tRow cache save period: " + // fam.getRow_cache_save_period_in_seconds()); // m_schemaDescription.append("\n\tKeys cached: " + // fam.getKey_cache_size()); // m_schemaDescription.append("\n\tKey cached save period: " + // fam.getKey_cache_save_period_in_seconds()); m_schemaDescription.append("\n\tRead repair chance: " + fam.getRead_repair_chance()); m_schemaDescription .append("\n\tGC grace: " + fam.getGc_grace_seconds()); m_schemaDescription.append("\n\tMin compaction threshold: " + fam.getMin_compaction_threshold()); m_schemaDescription.append("\n\tMax compaction threshold: " + fam.getMax_compaction_threshold()); m_schemaDescription.append("\n\tReplicate on write: " + fam.replicate_on_write); // String rowCacheP = fam.getRow_cache_provider(); m_schemaDescription.append("\n\n\tColumn metadata:"); colDefs = fam; break; } } if (colDefs == null) { throw new Exception(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.UnableToFindRequestedColumnFamily", m_columnFamilyName, conn.m_keyspaceName)); } m_columnNameEncoding = m_columnComparator; // set up our meta data map m_columnMeta = new TreeMap<String, String>(); m_indexedVals = new HashMap<String, HashSet<Object>>(); String comment = colDefs.getComment(); if (comment != null && comment.length() > 0) { extractIndexedMeta(comment, m_indexedVals); } Iterator<ColumnDef> colMetaData = colDefs.getColumn_metadataIterator(); if (colMetaData != null) { while (colMetaData.hasNext()) { ColumnDef currentDef = colMetaData.next(); ByteBuffer b = ByteBuffer.wrap(currentDef.getName()); String colName = getColumnValue(b, m_columnComparator).toString(); String colType = currentDef.getValidation_class(); m_columnMeta.put(colName, colType); m_schemaDescription.append("\n\tColumn name: " + colName); m_schemaDescription.append("\n\t\tColumn validator: " + colType); String indexName = currentDef.getIndex_name(); if (!Const.isEmpty(indexName)) { m_schemaDescription.append("\n\t\tIndex name: " + currentDef.getIndex_name()); } if (m_indexedVals.containsKey(colName)) { HashSet<Object> indexedVals = m_indexedVals.get(colName); m_schemaDescription.append("\n\t\tLegal values: {"); int count = 0; for (Object val : indexedVals) { m_schemaDescription.append(val.toString()); count++; if (count != indexedVals.size()) { m_schemaDescription.append(","); } else { m_schemaDescription.append("}"); } } } } } } protected void extractIndexedMeta(String comment, Map<String, HashSet<Object>> indexedVals) { if (comment.indexOf("@@@") < 0) { return; } String meta = comment.substring(comment.indexOf("@@@"), comment.lastIndexOf("@@@")); meta = meta.replace("@@@", ""); String[] fields = meta.split(";"); for (String field : fields) { field = field.trim(); String[] parts = field.split(":"); if (parts.length != 2) { continue; } String fieldName = parts[0].trim(); String valsS = parts[1]; valsS = valsS.replace("{", ""); valsS = valsS.replace("}", ""); String[] vals = valsS.split(","); if (vals.length > 0) { HashSet<Object> valsSet = new HashSet<Object>(); for (String aVal : vals) { valsSet.add(aVal.trim()); } indexedVals.put(fieldName, valsSet); } } // } } /** * Static utility routine for checking for the existence of a column family * (table) * * @param conn the connection to use * @param columnFamily the column family to check for * @return true if the supplied column family name exists in the keyspace * @throws Exception if a problem occurs */ public static boolean columnFamilyExists(CassandraConnection conn, String columnFamily) throws Exception { boolean found = false; // column families KsDef keySpace = conn.describeKeyspace(); List<CfDef> colFams = null; if (keySpace != null) { colFams = keySpace.getCf_defs(); } else { throw new Exception(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.UnableToGetMetaDataForKeyspace", conn.m_keyspaceName)); } // look for the requested column family for (CfDef fam : colFams) { String columnFamilyName = fam.getName(); // table name if (columnFamilyName.equals(columnFamily)) { found = true; break; } } return found; } /** * Static utility routine that returns a list of column families that exist in * the keyspace encapsulated in the supplied connection * * @param conn the connection to use * @return a list of column families (tables) * @throws Exception if a problem occurs */ public static List<String> getColumnFamilyNames(CassandraConnection conn) throws Exception { KsDef keySpace = conn.describeKeyspace(); List<CfDef> colFams = null; if (keySpace != null) { colFams = keySpace.getCf_defs(); } else { throw new Exception(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.UnableToGetMetaDataForKeyspace", conn.m_keyspaceName)); } List<String> colFamNames = new ArrayList<String>(); for (CfDef fam : colFams) { colFamNames.add(fam.getName()); } return colFamNames; } /** * Return the schema overview information * * @return the textual description of the schema */ public String getSchemaDescription() { return m_schemaDescription.toString(); } /** * Return the Cassandra column type (internal cassandra class name relative to * org.apache.cassandra.db.marshal) for the given Kettle column. * * @param vm the ValueMetaInterface for the Kettle column * @return the corresponding internal cassandra type. */ public static String getCassandraTypeForValueMeta(ValueMetaInterface vm) { switch (vm.getType()) { case ValueMetaInterface.TYPE_STRING: return "UTF8Type"; case ValueMetaInterface.TYPE_BIGNUMBER: return "DecimalType"; case ValueMetaInterface.TYPE_BOOLEAN: return "BooleanType"; case ValueMetaInterface.TYPE_INTEGER: return "LongType"; case ValueMetaInterface.TYPE_NUMBER: return "DoubleType"; case ValueMetaInterface.TYPE_DATE: return "DateType"; case ValueMetaInterface.TYPE_BINARY: case ValueMetaInterface.TYPE_SERIALIZABLE: return "BytesType"; } return "UTF8Type"; } /** * Return the Cassandra CQL column/key type for the given Kettle column. We * use this type for CQL create column family statements since, for some * reason, the internal type isn't recognized for the key. Internal types * *are* recognized for column definitions. The CQL reference guide states * that fully qualified (or relative to org.apache.cassandra.db.marshal) class * names can be used instead of CQL types - however, using these when defining * the key type always results in BytesType getting set for the key for some * reason. * * @param vm the ValueMetaInterface for the Kettle column * @return the corresponding CQL type */ public static String getCQLTypeForValueMeta(ValueMetaInterface vm) { switch (vm.getType()) { case ValueMetaInterface.TYPE_STRING: return "varchar"; case ValueMetaInterface.TYPE_BIGNUMBER: return "decimal"; case ValueMetaInterface.TYPE_BOOLEAN: return "boolean"; case ValueMetaInterface.TYPE_INTEGER: return "bigint"; case ValueMetaInterface.TYPE_NUMBER: return "double"; case ValueMetaInterface.TYPE_DATE: return "timestamp"; case ValueMetaInterface.TYPE_BINARY: case ValueMetaInterface.TYPE_SERIALIZABLE: return "blob"; } return "blob"; } /** * Static utility method that converts a Kettle value into an appropriately * encoded CQL string. * * @param vm the ValueMeta for the Kettle value * @param value the actual Kettle value * @return an appropriately encoded CQL string representation of the value, * suitable for using in an CQL query. * @throws KettleValueException if there is an error converting. */ public static String kettleValueToCQL(ValueMetaInterface vm, Object value) throws KettleValueException { switch (vm.getType()) { case ValueMetaInterface.TYPE_STRING: { UTF8Type u = UTF8Type.instance; String toConvert = vm.getString(value); ByteBuffer decomposed = u.decompose(toConvert); String cassandraString = u.getString(decomposed); return escapeSingleQuotes(cassandraString); } case ValueMetaInterface.TYPE_BIGNUMBER: { DecimalType dt = DecimalType.instance; BigDecimal toConvert = vm.getBigNumber(value); ByteBuffer decomposed = dt.decompose(toConvert); String cassandraString = dt.getString(decomposed); return cassandraString; } case ValueMetaInterface.TYPE_BOOLEAN: { BooleanType bt = BooleanType.instance; Boolean toConvert = vm.getBoolean(value); ByteBuffer decomposed = bt.decompose(toConvert); String cassandraString = bt.getString(decomposed); return escapeSingleQuotes(cassandraString); } case ValueMetaInterface.TYPE_INTEGER: { LongType lt = LongType.instance; Long toConvert = vm.getInteger(value); ByteBuffer decomposed = lt.decompose(toConvert); String cassandraString = lt.getString(decomposed); return cassandraString; } case ValueMetaInterface.TYPE_NUMBER: { DoubleType dt = DoubleType.instance; Double toConvert = vm.getNumber(value); ByteBuffer decomposed = dt.decompose(toConvert); String cassandraString = dt.getString(decomposed); return cassandraString; } case ValueMetaInterface.TYPE_DATE: DateType d = DateType.instance; Date toConvert = vm.getDate(value); ByteBuffer decomposed = d.decompose(toConvert); String cassandraFormattedDateString = d.getString(decomposed); return escapeSingleQuotes(cassandraFormattedDateString); case ValueMetaInterface.TYPE_BINARY: case ValueMetaInterface.TYPE_SERIALIZABLE: throw new KettleValueException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.CantConvertBinaryToCQL")); } throw new KettleValueException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.CantConvertType", vm.getName(), vm.getTypeDesc())); } /** * Static utility to decompose a Kettle value to a ByteBuffer. Note - does not * check if the kettle value is null. * * @param vm the ValueMeta for the Kettle value * @param value the actual Kettle value * @return a ByteBuffer encapsulating the bytes for the decomposed value * @throws KettleException if a problem occurs */ public ByteBuffer kettleValueToByteBuffer(ValueMetaInterface vm, Object value, boolean isKey) throws KettleException { String fullTransCoder = m_defaultValidationClass; // check the key first if (isKey) { fullTransCoder = m_keyValidator; } else { fullTransCoder = m_columnMeta.get(vm.getName()); if (fullTransCoder == null) { // use default if not in column meta data fullTransCoder = m_defaultValidationClass; } } String transCoder = fullTransCoder; // if it's a composite type make sure that we check only against the // primary type if (transCoder.indexOf('(') > 0) { transCoder = transCoder.substring(0, transCoder.indexOf('(')); } ByteBuffer decomposed = null; if (transCoder.indexOf("UTF8Type") > 0) { UTF8Type u = UTF8Type.instance; decomposed = u.decompose(vm.getString(value)); } else if (transCoder.indexOf("AsciiType") > 0) { AsciiType at = AsciiType.instance; decomposed = at.decompose(vm.getString(value)); } else if (transCoder.indexOf("LongType") > 0) { LongType lt = LongType.instance; decomposed = lt.decompose(vm.getInteger(value)); } else if (transCoder.indexOf("DoubleType") > 0) { DoubleType dt = DoubleType.instance; decomposed = dt.decompose(vm.getNumber(value)); } else if (transCoder.indexOf("DateType") > 0) { DateType dt = DateType.instance; decomposed = dt.decompose(vm.getDate(value)); } else if (transCoder.indexOf("IntegerType") > 0) { IntegerType it = IntegerType.instance; decomposed = it.decompose(vm.getBigNumber(value).toBigInteger()); } else if (transCoder.indexOf("FloatType") > 0) { FloatType ft = FloatType.instance; decomposed = ft.decompose(vm.getNumber(value).floatValue()); } else if (transCoder.indexOf("LexicalUUIDType") > 0) { LexicalUUIDType lt = LexicalUUIDType.instance; UUID uuid = UUID.fromString((vm.getString(value))); decomposed = lt.decompose(uuid); } else if (transCoder.indexOf("UUIDType") > 0) { UUIDType ut = UUIDType.instance; UUID uuid = UUID.fromString((vm.getString(value))); decomposed = ut.decompose(uuid); } else if (transCoder.indexOf("BooleanType") > 0) { BooleanType bt = BooleanType.instance; decomposed = bt.decompose(vm.getBoolean(value)); } else if (transCoder.indexOf("Int32Type") > 0) { Int32Type it = Int32Type.instance; decomposed = it.decompose(vm.getInteger(value).intValue()); } else if (transCoder.indexOf("DecimalType") > 0) { DecimalType dt = DecimalType.instance; decomposed = dt.decompose(vm.getBigNumber(value)); } else if (transCoder.indexOf("DynamicCompositeType") > 0) { AbstractType serializer = null; if (vm.isString()) { try { serializer = TypeParser.parse(fullTransCoder); decomposed = ((DynamicCompositeType) serializer).fromString(vm .getString(value)); } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } else { throw new KettleException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.CantConvertTypeThrift", vm.getTypeDesc(), fullTransCoder)); } } else if (transCoder.indexOf("CompositeType") > 0) { AbstractType serializer = null; if (vm.isString()) { try { serializer = TypeParser.parse(fullTransCoder); decomposed = ((CompositeType) serializer).fromString(vm.toString()); } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } else { throw new KettleException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.CantConvertTypeThrift", vm.getTypeDesc(), fullTransCoder)); } } if (decomposed == null) { throw new KettleException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.UnableToConvertValue", vm.getName())); } return decomposed; } protected static String escapeSingleQuotes(String source) { // escaped by doubling (as in SQL) return source.replace("'", "''"); } /** * Encode a string representation of a column name using the serializer for * the default comparator. * * @param colName the textual column name to serialze * @return a ByteBuffer encapsulating the serialized column name * @throws KettleException if a problem occurs during serialization */ public ByteBuffer columnNameToByteBuffer(String colName) throws KettleException { AbstractType serializer = null; String fullEncoder = m_columnComparator; String encoder = fullEncoder; // if it's a composite type make sure that we check only against the // primary type if (encoder.indexOf('(') > 0) { encoder = encoder.substring(0, encoder.indexOf('(')); } if (encoder.indexOf("UTF8Type") > 0) { serializer = UTF8Type.instance; } else if (encoder.indexOf("AsciiType") > 0) { serializer = AsciiType.instance; } else if (encoder.indexOf("LongType") > 0) { serializer = LongType.instance; } else if (encoder.indexOf("DoubleType") > 0) { serializer = DoubleType.instance; } else if (encoder.indexOf("DateType") > 0) { serializer = DateType.instance; } else if (encoder.indexOf("IntegerType") > 0) { serializer = IntegerType.instance; } else if (encoder.indexOf("FloatType") > 0) { serializer = FloatType.instance; } else if (encoder.indexOf("LexicalUUIDType") > 0) { serializer = LexicalUUIDType.instance; } else if (encoder.indexOf("UUIDType") > 0) { serializer = UUIDType.instance; } else if (encoder.indexOf("BooleanType") > 0) { serializer = BooleanType.instance; } else if (encoder.indexOf("Int32Type") > 0) { serializer = Int32Type.instance; } else if (encoder.indexOf("DecimalType") > 0) { serializer = DecimalType.instance; } else if (encoder.indexOf("DynamicCompositeType") > 0) { try { serializer = TypeParser.parse(fullEncoder); } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } else if (encoder.indexOf("CompositeType") > 0) { try { serializer = TypeParser.parse(fullEncoder); } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } ByteBuffer result = serializer.fromString(colName); return result; } /** * Encodes and object via serialization * * @param obj the object to encode * @return an array of bytes containing the serialized object * @throws IOException if serialization fails * * public static byte[] encodeObject(Object obj) throws IOException * { ByteArrayOutputStream bos = new ByteArrayOutputStream(); * BufferedOutputStream buf = new BufferedOutputStream(bos); * ObjectOutputStream oos = new ObjectOutputStream(buf); * oos.writeObject(obj); buf.flush(); * * return bos.toByteArray(); } */ /** * Get the Kettle ValueMeta the corresponds to the type of the key for this * column family. * * @return the key's ValueMeta */ public ValueMetaInterface getValueMetaForKey() { return getValueMetaForColumn(getKeyName()); } /** * Get the Kettle ValueMeta that corresponds to the type of the supplied * cassandra column. * * @param colName the name of the column to get a ValueMeta for * @return the ValueMeta that is appropriate for the type of the supplied * column. */ public ValueMetaInterface getValueMetaForColumn(String colName) { String type = null; // check the key first if (colName.equals(getKeyName())) { type = m_keyValidator; } else { type = m_columnMeta.get(colName); if (type == null) { type = m_defaultValidationClass; } } int kettleType = 0; if (type.indexOf("UTF8Type") > 0 || type.indexOf("AsciiType") > 0 || type.indexOf("UUIDType") > 0 || type.indexOf("CompositeType") > 0) { kettleType = ValueMetaInterface.TYPE_STRING; } else if (type.indexOf("LongType") > 0 || type.indexOf("IntegerType") > 0 || type.indexOf("Int32Type") > 0) { kettleType = ValueMetaInterface.TYPE_INTEGER; } else if (type.indexOf("DoubleType") > 0 || type.indexOf("FloatType") > 0) { kettleType = ValueMetaInterface.TYPE_NUMBER; } else if (type.indexOf("DateType") > 0) { kettleType = ValueMetaInterface.TYPE_DATE; } else if (type.indexOf("DecimalType") > 0) { kettleType = ValueMetaInterface.TYPE_BIGNUMBER; } else if (type.indexOf("BytesType") > 0) { kettleType = ValueMetaInterface.TYPE_BINARY; } else if (type.indexOf("BooleanType") > 0) { kettleType = ValueMetaInterface.TYPE_BOOLEAN; } ValueMetaInterface newVM = new ValueMeta(colName, kettleType); if (m_indexedVals.containsKey(colName)) { // make it indexed! newVM.setStorageType(ValueMetaInterface.STORAGE_TYPE_INDEXED); HashSet<Object> indexedV = m_indexedVals.get(colName); Object[] iv = indexedV.toArray(); newVM.setIndex(iv); } return newVM; } /** * Get a list of ValueMetas corresponding to the columns in this schema * * @return a list of ValueMetas */ public List<ValueMetaInterface> getValueMetasForSchema() { List<ValueMetaInterface> newL = new ArrayList<ValueMetaInterface>(); for (String colName : m_columnMeta.keySet()) { ValueMetaInterface colVM = getValueMetaForColumn(colName); newL.add(colVM); } return newL; } /** * Get a Set of column names that are defined in the meta data for this schema * * @return a set of column names. */ public Set<String> getColumnNames() { // only returns those column names that are defined in the schema! return m_columnMeta.keySet(); } /** * Returns true if the supplied column name exists in this schema. * * @param colName the name of the column to check. * @return true if the column exists in the meta data for this column family. */ public boolean columnExistsInSchema(String colName) { return (m_columnMeta.get(colName) != null); } /** * Get the name of the key for this column family (equals the name of the * column family). * * @return the name of the key */ public String getKeyName() { // we use the column family/table name as the key return getColumnFamilyName(); } /** * Return the name of this column family. * * @return the name of this column family. */ public String getColumnFamilyName() { return m_columnFamilyName; } /** * Return the decoded key value of a row. Assumes that the supplied row comes * from the column family that this meta data represents!! * * @param row a Cassandra row * @return the decoded key value * @throws KettleException if a deserializer can't be determined */ public Object getKeyValue(CqlRow row) throws KettleException { /* * byte[] key = row.getKey(); * * return getColumnValue(key, m_keyValidator); */ ByteBuffer key = row.bufferForKey(); if (m_keyValidator.indexOf("BytesType") > 0) { return row.getKey(); } return getColumnValue(key, m_keyValidator); } /** * Return the decoded key value of a row. Assumes that the supplied row comes * from the column family that this meta data represents!! * * @param row a Cassandra row * @return the decoded key value * @throws KettleException if a deserializer can't be determined */ public Object getKeyValue(KeySlice row) throws KettleException { ByteBuffer key = row.bufferForKey(); if (m_keyValidator.indexOf("BytesType") > 0) { return row.getKey(); } return getColumnValue(key, m_keyValidator); } public String getColumnName(Column aCol) throws KettleException { ByteBuffer b = aCol.bufferForName(); String decodedColName = getColumnValue(b, m_columnComparator).toString(); return decodedColName; } private Object getColumnValue(ByteBuffer valueBuff, String decoder) throws KettleException { if (valueBuff == null) { return null; } Object result = null; AbstractType deserializer = null; String fullDecoder = decoder; // if it's a composite type make sure that we check only against the // primary type if (decoder.indexOf('(') > 0) { decoder = decoder.substring(0, decoder.indexOf('(')); } if (decoder.indexOf("UTF8Type") > 0) { deserializer = UTF8Type.instance; } else if (decoder.indexOf("AsciiType") > 0) { deserializer = AsciiType.instance; } else if (decoder.indexOf("LongType") > 0) { deserializer = LongType.instance; } else if (decoder.indexOf("DoubleType") > 0) { deserializer = DoubleType.instance; } else if (decoder.indexOf("DateType") > 0) { deserializer = DateType.instance; } else if (decoder.indexOf("IntegerType") > 0) { deserializer = IntegerType.instance; result = new Long(((IntegerType) deserializer).compose(valueBuff) .longValue()); return result; } else if (decoder.indexOf("FloatType") > 0) { deserializer = FloatType.instance; result = new Double(((FloatType) deserializer).compose(valueBuff)) .doubleValue(); return result; } else if (decoder.indexOf("LexicalUUIDType") > 0) { deserializer = LexicalUUIDType.instance; result = new String(((LexicalUUIDType) deserializer).compose(valueBuff) .toString()); return result; } else if (decoder.indexOf("UUIDType") > 0) { deserializer = UUIDType.instance; result = new String(((UUIDType) deserializer).compose(valueBuff) .toString()); return result; } else if (decoder.indexOf("BooleanType") > 0) { deserializer = BooleanType.instance; } else if (decoder.indexOf("Int32Type") > 0) { deserializer = Int32Type.instance; result = new Long(((Int32Type) deserializer).compose(valueBuff)) .longValue(); return result; } else if (decoder.indexOf("DecimalType") > 0) { deserializer = DecimalType.instance; } else if (decoder.indexOf("DynamicCompositeType") > 0) { try { deserializer = TypeParser.parse(fullDecoder); // now return the string representation of the composite value result = ((DynamicCompositeType) deserializer).getString(valueBuff); return result; } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } else if (decoder.indexOf("CompositeType") > 0) { try { deserializer = TypeParser.parse(fullDecoder); // now return the string representation of the composite value result = ((CompositeType) deserializer).getString(valueBuff); return result; } catch (ConfigurationException e) { throw new KettleException(e.getMessage(), e); } catch (SyntaxException e) { throw new KettleException(e.getMessage(), e); } } if (deserializer == null) { throw new KettleException(BaseMessages.getString(PKG, "CassandraColumnMetaData.Error.CantFindADeserializerForType", fullDecoder)); } result = deserializer.compose(valueBuff); return result; } /** * Decode the supplied column value. Uses the default validation class to * decode the value if the column is not explicitly defined in the schema. * * @param aCol * @return * @throws KettleException */ public Object getColumnValue(Column aCol) throws KettleException { String colName = getColumnName(aCol); // Clients should use getKey() for getting the key if (colName.equals("KEY")) { return null; } String decoder = m_columnMeta.get(colName); if (decoder == null) { // column is not in schema so use default validator decoder = m_defaultValidationClass; } String fullDecoder = decoder; if (decoder.indexOf('(') > 0) { decoder = decoder.substring(0, decoder.indexOf('(')); } if (decoder.indexOf("BytesType") > 0) { return aCol.getValue(); // raw bytes } ByteBuffer valueBuff = aCol.bufferForValue(); Object result = getColumnValue(valueBuff, fullDecoder); // check for indexed values if (m_indexedVals.containsKey(colName)) { HashSet<Object> vals = m_indexedVals.get(colName); // look for the correct index int foundIndex = -1; Object[] indexedV = vals.toArray(); for (int i = 0; i < indexedV.length; i++) { if (indexedV[i].equals(result)) { foundIndex = i; break; } } if (foundIndex >= 0) { result = new Integer(foundIndex); } else { result = null; // any values that are not indexed are unknown... } } return result; } }