/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.datacollector;
import com.streamsets.datacollector.runner.BatchListener;
import com.streamsets.datacollector.runner.Pipe;
import com.streamsets.datacollector.runner.Pipeline;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.api.impl.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class EmbeddedDataCollectorFactory {
private static final Logger LOG = LoggerFactory.getLogger(EmbeddedDataCollectorFactory.class);
public static final String SPARK_DPROCESSOR_CLASS =
"com.streamsets.pipeline.stage.processor.spark.SparkDProcessor";
private EmbeddedDataCollectorFactory() {}
public static PipelineStartResult startPipeline(final Runnable postBatchRunnable) throws Exception {
EmbeddedDataCollector embeddedDataCollector = new EmbeddedDataCollector();
embeddedDataCollector.init();
embeddedDataCollector.startPipeline();
long startTime = System.currentTimeMillis();
long endTime = startTime;
long diff = 0;
while (embeddedDataCollector.getPipeline() == null && diff < 60000) {
LOG.debug("Waiting for pipeline to be created");
Thread.sleep(100);
endTime = System.currentTimeMillis();
diff = endTime - startTime;
}
if (diff > 60000) {
throw new IllegalStateException(Utils.format("Pipeline has not started even after waiting '{}'", diff));
}
Pipeline realPipeline = embeddedDataCollector.getPipeline();
List<Object> sparkProcessors = new ArrayList<>();
List<Pipe> pipes = realPipeline.getRunners().get(0).getPipes(); // Cluster pipelines are single threaded.
for (Pipe pipe : pipes) {
Stage stage = pipe.getStage().getStage();
if (stage.getClass().getCanonicalName().equals(SPARK_DPROCESSOR_CLASS)) {
LOG.info("Added Spark Processor for " + stage.toString());
if (!sparkProcessors.contains(stage)) {
sparkProcessors.add(stage);
}
}
}
realPipeline.getRunner().registerListener(new BatchListener() {
@Override
public void preBatch() {
// nothing
}
@Override
public void postBatch() {
postBatchRunnable.run();
}
});
return new PipelineStartResult(realPipeline.getSource(), sparkProcessors);
}
}