package com.kryptnostic.rhizome.pods; import java.util.function.Supplier; import java.util.stream.Collectors; import javax.inject.Inject; import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; import org.apache.spark.sql.SparkSession; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; import com.datastax.driver.core.Cluster; import com.datastax.driver.core.CodecRegistry; import com.google.common.base.Optional; import com.kryptnostic.rhizome.configuration.RhizomeConfiguration; import com.kryptnostic.rhizome.configuration.cassandra.CassandraConfiguration; import com.kryptnostic.rhizome.configuration.spark.SparkConfiguration; @Configuration @Profile( SparkPod.SPARK_PROFILE ) @Import( CassandraPod.class ) public class SparkPod { public static final String SPARK_PROFILE = "spark"; private static final Logger logger = LoggerFactory.getLogger( SparkPod.class ); private static final String CASSANDRA_CONNECTION_FACTORY_PROPERTY = "spark.cassandra.connection.factory"; private static Supplier<Cluster> CLUSTER_FACTORY = null; public static String CASSANDRA_CONNECTION_FACTORY_CLASS = null; @Inject private RhizomeConfiguration rhizomeConfiguration; @Bean public SparkConf sparkConf() { Optional<SparkConfiguration> maybeSparkConfiguration = rhizomeConfiguration.getSparkConfiguration(); Optional<CassandraConfiguration> maybeCassandraConfiguration = rhizomeConfiguration.getCassandraConfiguration(); if ( maybeSparkConfiguration.isPresent() && maybeCassandraConfiguration.isPresent() ) { SparkConfiguration sparkConfiguration = maybeSparkConfiguration.get(); CassandraConfiguration cassandraConfiguration = maybeCassandraConfiguration.get(); CLUSTER_FACTORY = () -> CassandraPod.clusterBuilder( cassandraConfiguration ) .withCodecRegistry( CodecRegistry.DEFAULT_INSTANCE ).build(); StringBuilder sparkMasterUrlBuilder; if ( sparkConfiguration.isLocal() ) { sparkMasterUrlBuilder = new StringBuilder( sparkConfiguration.getSparkMasters().iterator().next() ); } else { sparkMasterUrlBuilder = new StringBuilder( "spark://" ); String sparkMastersAsString = sparkConfiguration.getSparkMasters().stream() .map( master -> master + ":" + Integer.toString( sparkConfiguration.getSparkPort() ) ) .collect( Collectors.joining( "," ) ); sparkMasterUrlBuilder.append( sparkMastersAsString ); } return new SparkConf() .setMaster( sparkMasterUrlBuilder.toString() ) .setAppName( sparkConfiguration.getAppName() ) .set( "spark.sql.warehouse.dir", "file:///" + sparkConfiguration.getWorkingDirectory() ) .set( "spark.cassandra.connection.host", cassandraConfiguration.getCassandraSeedNodes().stream() .map( host -> host.getHostAddress() ).collect( Collectors.joining( "," ) ) ) .set( "spark.cassandra.connection.port", Integer.toString( 9042 ) ) .set( "spark.cassandra.connection.ssl.enabled", String.valueOf( cassandraConfiguration.isSslEnabled() ) ) .setJars( sparkConfiguration.getJarLocations() ); } return null; } @Bean public SparkSession sparkSession() { SparkConf sc = sparkConf(); if ( StringUtils.isNotBlank( CASSANDRA_CONNECTION_FACTORY_CLASS ) ) { logger.info( "Cassandra connector factory class: {}", CASSANDRA_CONNECTION_FACTORY_CLASS ); sc.set( CASSANDRA_CONNECTION_FACTORY_PROPERTY, CASSANDRA_CONNECTION_FACTORY_CLASS ); } return sc == null ? null : SparkSession.builder().config( sc ).getOrCreate(); } /** * This is hack to allow bootstrapping cluster from cassandra configuration. * * @return A cluster instance as described by the default CassandraConfiguration. Will return null if called before * spring invokes {@code SparkPod#sparkConf()}. */ public static Supplier<Cluster> getCluster() { return CLUSTER_FACTORY; } }