/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.datacollector.runner;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.RateLimiter;
import com.streamsets.datacollector.config.StageType;
import com.streamsets.datacollector.record.RecordImpl;
import com.streamsets.pipeline.api.BatchMaker;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.impl.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class BatchMakerImpl implements BatchMaker {
private static final Logger LOG = LoggerFactory.getLogger(BatchMakerImpl.class);
private final StagePipe stagePipe;
private final String instanceName;
private final List<String> outputLanes;
private final String singleOutputLane;
private final Map<String, List<Record>> stageOutput;
private final Map<String, List<Record>> stageOutputSnapshot;
private int recordAllowance;
private int size;
private boolean recordByRef;
private Optional<RateLimiter> rateLimiterOptional = Optional.absent();
public BatchMakerImpl(StagePipe stagePipe, boolean keepSnapshot) {
this(stagePipe, keepSnapshot, Integer.MAX_VALUE);
}
public BatchMakerImpl(StagePipe stagePipe, boolean keepSnapshot, int recordAllowance) {
this.stagePipe = stagePipe;
this.instanceName= stagePipe.getStage().getInfo().getInstanceName();
outputLanes = ImmutableList.copyOf(stagePipe.getStage().getConfiguration().getOutputLanes());
singleOutputLane = (outputLanes.size() == 1) ? outputLanes.iterator().next() : null;
stageOutput = new HashMap<>();
stageOutputSnapshot = (keepSnapshot) ? new HashMap<String, List<Record>>() : null;
for (String outputLane : outputLanes) {
stageOutput.put(outputLane, new ArrayList<Record>());
if (stageOutputSnapshot != null) {
stageOutputSnapshot.put(outputLane, new ArrayList<Record>());
}
}
this.recordAllowance = recordAllowance;
// if the stage is annotated as recordsByRef it means it does not reuse the records it creates, thus
// we can skip one copy here (just here though), except if we are in preview
recordByRef = !stagePipe.getStage().getContext().isPreview() &&
stagePipe.getStage().getDefinition().getRecordsByRef();
}
boolean isRecordByRef() {
return recordByRef;
}
public StagePipe getStagePipe() {
return stagePipe;
}
@Override
public List<String> getLanes() {
return outputLanes;
}
@VisibleForTesting
RecordImpl getRecordForBatchMaker(Record record) {
// in the constructor we figured out if we can do recordByRef or not
return (recordByRef) ? (RecordImpl) record: ((RecordImpl) record).clone();
}
@Override
public void addRecord(Record record, String... lanes) {
if (recordAllowance-- == 0) {
//Some origins like "Kafka source" translate one message into multiple records [think JSON multiple objects mode]
//the number of records may tip over the max batch size [both in preview and run].
//Allow this. Max batch size is more of a guideline.
LOG.warn("The maximum number of records per batch in the origin has been exceeded.");
}
Preconditions.checkNotNull(record, "record cannot be null");
RecordImpl recordCopy = getRecordForBatchMaker(record);
recordCopy.addStageToStagePath(instanceName);
recordCopy.createTrackingId();
if (recordCopy.isInitialRecord()) {
RecordImpl recordSource = recordCopy.clone();
recordCopy.getHeader().setSourceRecord(recordSource);
recordCopy.setInitialRecord(false);
}
if (getStagePipe().getStage().getDefinition().getType() == StageType.SOURCE) {
// Now slow down until we can actually add the record.
if (rateLimiterOptional.isPresent()) {
rateLimiterOptional.get().acquire();
}
}
if (lanes.length == 0) {
Preconditions.checkArgument(outputLanes.size() == 1, Utils.formatL(
"No stream has been specified and the stage '{}' has multiple output streams '{}'", instanceName, outputLanes));
stageOutput.get(singleOutputLane).add(recordCopy);
} else {
if (lanes.length > 1) {
Set<String> laneSet = ImmutableSet.copyOf(lanes);
Preconditions.checkArgument(laneSet.size() == lanes.length, Utils.formatL(
"Specified streams cannot have duplicates '{}'", laneSet));
}
for (String lane : lanes) {
Preconditions.checkArgument(outputLanes.contains(lane), Utils.formatL(
"Invalid output stream '{}' for stage '{}', available streams '{}'", lane, instanceName, outputLanes));
stageOutput.get(lane).add(recordCopy);
}
}
if (stageOutputSnapshot != null) {
recordCopy = recordCopy.clone();
if (lanes.length == 0) {
stageOutputSnapshot.get(singleOutputLane).add(recordCopy);
} else {
for (String lane : lanes) {
stageOutputSnapshot.get(lane).add(recordCopy);
}
}
}
size++;
}
public Map<String, List<Record>> getStageOutput() {
return stageOutput;
}
public Map<String, List<Record>> getStageOutputSnapshot() {
return stageOutputSnapshot;
}
public int getSize() {
return size;
}
public int getSize(String lane) {
return stageOutput.get(lane).size();
}
public void setRateLimiter(@Nullable RateLimiter rateLimiter) {
rateLimiterOptional = Optional.fromNullable(rateLimiter);
}
@Override
public String toString() {
return Utils.format("BatchMakerImpl[instance='{}' lanes='{}' size='{}' keepsSnapshot='{}']", instanceName,
getLanes(), getSize(), stageOutputSnapshot != null);
}
}