/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy;
import gobblin.util.guid.Guid;
import gobblin.util.guid.HasGuid;
import gobblin.util.io.GsonInterfaceAdapter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import lombok.Singular;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
/**
* Abstraction for a work unit for distcp.
*/
@Getter
@Setter
@AllArgsConstructor(access = AccessLevel.PROTECTED)
@NoArgsConstructor(access = AccessLevel.PROTECTED)
@EqualsAndHashCode
public class CopyEntity implements HasGuid {
public static final Gson GSON = GsonInterfaceAdapter.getGson(Object.class);
/**
* File set this file belongs to. {@link CopyEntity}s in the same fileSet and originating from the same
* {@link CopyableDataset} will be treated as a unit: they will be published nearly atomically, and a notification
* will be emitted for each fileSet when it is published.
*/
private String fileSet;
/** Contains arbitrary metadata usable by converters and/or publisher. */
@Singular(value = "metadata")
private Map<String, String> additionalMetadata;
@Override
public Guid guid() throws IOException {
return Guid.fromStrings(toString());
}
/**
* Serialize an instance of {@link CopyEntity} into a {@link String}.
*
* @param copyEntity to be serialized
* @return serialized string
*/
public static String serialize(CopyEntity copyEntity) {
return GSON.toJson(copyEntity);
}
/**
* Serialize a {@link List} of {@link CopyEntity}s into a {@link String}.
*
* @param copyEntities to be serialized
* @return serialized string
*/
public static String serializeList(List<CopyEntity> copyEntities) {
return GSON.toJson(copyEntities, new TypeToken<List<CopyEntity>>() {}.getType());
}
/**
* Deserializes the serialized {@link CopyEntity} string.
*
* @param serialized string
* @return a new instance of {@link CopyEntity}
*/
public static CopyEntity deserialize(String serialized) {
return GSON.fromJson(serialized, CopyEntity.class);
}
/**
* Deserializes the serialized {@link List} of {@link CopyEntity} string.
* Used together with {@link #serializeList(List)}
*
* @param serialized string
* @return a new {@link List} of {@link CopyEntity}s
*/
public static List<CopyEntity> deserializeList(String serialized) {
return GSON.fromJson(serialized, new TypeToken<List<CopyEntity>>() {}.getType());
}
@Override
public String toString() {
return serialize(this);
}
/**
* Get a {@link DatasetAndPartition} instance for the dataset and fileSet this {@link CopyEntity} belongs to.
* @param metadata {@link CopyableDatasetMetadata} for the dataset this {@link CopyEntity} belongs to.
* @return an instance of {@link DatasetAndPartition}
*/
public DatasetAndPartition getDatasetAndPartition(CopyableDatasetMetadata metadata) {
return new DatasetAndPartition(metadata, getFileSet());
}
/**
* Used for simulate runs. Should explain what this copy entity will do.
*/
public String explain() {
return toString();
}
/**
* Uniquely identifies a fileSet by also including the dataset metadata.
*/
@Data
@EqualsAndHashCode
public static class DatasetAndPartition {
private final CopyableDatasetMetadata dataset;
private final String partition;
/**
* @return a unique string identifier for this {@link DatasetAndPartition}.
*/
@SuppressWarnings("deprecation")
public String identifier() {
return Hex.encodeHexString(DigestUtils.sha(this.dataset.toString() + this.partition));
}
}
}