/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.source.workunit;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.State;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import gobblin.source.extractor.Extractor;
import gobblin.source.extractor.Watermark;
import gobblin.source.extractor.WatermarkInterval;
import lombok.ToString;
/**
* A logic concept that defines a unit of work or task for extracting a portion of the data
* to be pulled in a job run.
* <p>
* An instance of this class should contain all the properties an {@link Extractor} needs
* to extract the schema and data records.
* </p>
*
* @author kgoodhop
*/
@ToString
public class WorkUnit extends State {
private Extract extract;
private static final JsonParser JSON_PARSER = new JsonParser();
private static final Gson GSON = new Gson();
/**
* Default constructor.
*
* @deprecated Use {@link #createEmpty()}
*/
@Deprecated
public WorkUnit() {
this(null, null);
}
/**
* Constructor.
*
* @param state a {@link SourceState} the properties of which will be copied into this {@link WorkUnit} instance
* @param extract an {@link Extract}
*
* @deprecated Properties in {@link SourceState} should not be added to a {@link WorkUnit}. Having each
* {@link WorkUnit} contain a copy of {@link SourceState} is a waste of memory. Use {@link #create(Extract)}.
*/
@Deprecated
public WorkUnit(SourceState state, Extract extract) {
// Values should only be null for deserialization
if (state != null) {
super.addAll(state);
}
if (extract != null) {
this.extract = extract;
} else {
this.extract = new Extract(null, null, null, null);
}
}
/**
* Constructor for a {@link WorkUnit} given a {@link SourceState}, {@link Extract}, and a {@link WatermarkInterval}.
*
* @param state a {@link gobblin.configuration.SourceState} the properties of which will be copied into this {@link WorkUnit} instance.
* @param extract an {@link Extract}.
* @param watermarkInterval a {@link WatermarkInterval} which defines the range of data this {@link WorkUnit} will process.
*
* @deprecated Properties in {@link SourceState} should not be added to a {@link WorkUnit}. Having each
* {@link WorkUnit} contain a copy of {@link SourceState} is a waste of memory. Use {@link #create(Extract, WatermarkInterval)}.
*/
@Deprecated
public WorkUnit(SourceState state, Extract extract, WatermarkInterval watermarkInterval) {
this(state, extract);
/**
* TODO
*
* Hack that stores a {@link WatermarkInterval} by using its {@link WatermarkInterval#toJson()} method. Until a
* state-store migration, or a new state-store format is chosen, this hack will be the way that the
* {@link WatermarkInterval} is serialized / de-serialized. Once a state-store migration can be done, the
* {@link Watermark} can be stored as Binary JSON.
*/
setProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY, watermarkInterval.toJson().toString());
}
/**
* Constructor.
*
* @param extract a {@link Extract} object
*/
public WorkUnit(Extract extract) {
this.extract = extract;
}
/**
* Copy constructor.
*
* @param other the other {@link WorkUnit} instance
*
* @deprecated Use {@link #copyOf(WorkUnit)}
*/
@Deprecated
public WorkUnit(WorkUnit other) {
super.addAll(other);
this.extract = other.getExtract();
}
/**
* Factory method.
*
* @return An empty {@link WorkUnit}.
*/
public static WorkUnit createEmpty() {
return new WorkUnit();
}
/**
* Factory method.
*
* @param extract {@link Extract}
* @return A {@link WorkUnit} with the given {@link Extract}
*/
public static WorkUnit create(Extract extract) {
return new WorkUnit(null, extract);
}
/**
* Factory method.
*
* @param extract {@link Extract}
* @param watermarkInterval {@link WatermarkInterval}
* @return A {@link WorkUnit} with the given {@link Extract} and {@link WatermarkInterval}
*/
public static WorkUnit create(Extract extract, WatermarkInterval watermarkInterval) {
return new WorkUnit(null, extract, watermarkInterval);
}
/**
* Factory method.
*
* @param other a {@link WorkUnit} instance
* @return A copy of the given {@link WorkUnit} instance
*/
public static WorkUnit copyOf(WorkUnit other) {
return new WorkUnit(other);
}
/**
* Get the {@link Extract} associated with this {@link WorkUnit}.
*
* @return the {@link Extract} associated with this {@link WorkUnit}
*/
public Extract getExtract() {
return new ImmutableExtract(this.extract);
}
/**
* This method will allow a work unit to be skipped if needed.
*/
public void skip() {
this.setProp(ConfigurationKeys.WORK_UNIT_SKIP_KEY, true);
}
/**
* Get the low {@link Watermark} as a {@link JsonElement}.
*
* @return a {@link JsonElement} representing the low {@link Watermark} or
* {@code null} if the low {@link Watermark} is not set.
*/
public JsonElement getLowWatermark() {
if (!contains(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)) {
return null;
}
return JSON_PARSER.parse(getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)).getAsJsonObject()
.get(WatermarkInterval.LOW_WATERMARK_TO_JSON_KEY);
}
/**
* Get the low {@link Watermark}.
*
* @param watermarkClass the watermark class for this {@code WorkUnit}.
* @param gson a {@link Gson} object used to deserialize the watermark.
* @return the low watermark in this {@code WorkUnit}.
*/
public <T extends Watermark> T getLowWatermark(Class<T> watermarkClass, Gson gson) {
JsonElement json = getLowWatermark();
if (json == null) {
return null;
}
return gson.fromJson(json, watermarkClass);
}
/**
* Get the low {@link Watermark}. A default {@link Gson} object will be used to deserialize the watermark.
*
* @param watermarkClass the watermark class for this {@code WorkUnit}.
* @return the low watermark in this {@code WorkUnit}.
*/
public <T extends Watermark> T getLowWatermark(Class<T> watermarkClass) {
return getLowWatermark(watermarkClass, GSON);
}
/**
* Get the expected high {@link Watermark} as a {@link JsonElement}.
*
* @return a {@link JsonElement} representing the expected high {@link Watermark}.
*/
public JsonElement getExpectedHighWatermark() {
return JSON_PARSER.parse(getProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY)).getAsJsonObject()
.get(WatermarkInterval.EXPECTED_HIGH_WATERMARK_TO_JSON_KEY);
}
/**
* Get the expected high {@link Watermark}.
*
* @param watermarkClass the watermark class for this {@code WorkUnit}.
* @param gson a {@link Gson} object used to deserialize the watermark.
* @return the expected high watermark in this {@code WorkUnit}.
*/
public <T extends Watermark> T getExpectedHighWatermark(Class<T> watermarkClass, Gson gson) {
JsonElement json = getExpectedHighWatermark();
if (json == null) {
return null;
}
return gson.fromJson(json, watermarkClass);
}
/**
* Get the expected high {@link Watermark}. A default {@link Gson} object will be used to deserialize the watermark.
*
* @param watermarkClass the watermark class for this {@code WorkUnit}.
* @return the expected high watermark in this {@code WorkUnit}.
*/
public <T extends Watermark> T getExpectedHighWatermark(Class<T> watermarkClass) {
return getExpectedHighWatermark(watermarkClass, GSON);
}
/**
* Get the high watermark of this {@link WorkUnit}.
*
* @return high watermark
* @deprecated use the {@link #getExpectedHighWatermark()} method.
*/
@Deprecated
public long getHighWaterMark() {
return getPropAsLong(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY);
}
/**
* Set {@link WatermarkInterval} for a {@link WorkUnit}.
*/
public void setWatermarkInterval(WatermarkInterval watermarkInterval) {
setProp(ConfigurationKeys.WATERMARK_INTERVAL_VALUE_KEY, watermarkInterval.toJson().toString());
}
/**
* Set the high watermark of this {@link WorkUnit}.
*
* @param highWaterMark high watermark
* @deprecated use {@link #setWatermarkInterval(WatermarkInterval)}.
*/
@Deprecated
public void setHighWaterMark(long highWaterMark) {
setProp(ConfigurationKeys.WORK_UNIT_HIGH_WATER_MARK_KEY, highWaterMark);
}
/**
* Get the low watermark of this {@link WorkUnit}.
*
* @return low watermark
* @deprecated use the {@link #getLowWatermark()} method.
*/
@Deprecated
public long getLowWaterMark() {
return getPropAsLong(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY);
}
/**
* Set the low watermark of this {@link WorkUnit}.
*
* @param lowWaterMark low watermark
* @deprecated use {@link #setWatermarkInterval(WatermarkInterval)}.
*/
@Deprecated
public void setLowWaterMark(long lowWaterMark) {
setProp(ConfigurationKeys.WORK_UNIT_LOW_WATER_MARK_KEY, lowWaterMark);
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
this.extract.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
this.extract.write(out);
}
@Override
public boolean equals(Object object) {
if (!(object instanceof WorkUnit)) {
return false;
}
WorkUnit other = (WorkUnit) object;
return ((this.extract == null && other.extract == null)
|| (this.extract != null && this.extract.equals(other.extract))) && super.equals(other);
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((this.extract == null) ? 0 : this.extract.hashCode());
return result;
}
}