/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy.prioritization;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.hadoop.fs.FileSystem;
import gobblin.data.management.copy.CopyConfiguration;
import gobblin.data.management.copy.CopyEntity;
import gobblin.data.management.copy.IterableCopyableDataset;
import gobblin.data.management.partition.FileSet;
import gobblin.util.request_allocation.PushDownRequestor;
/**
* An {@link IterableCopyableDataset} where {@link gobblin.data.management.partition.FileSet}s generated by
* {@link #getFileSetIterator(FileSystem, CopyConfiguration)} are guaranteed to be ordered by the prioritizer at
* {@link CopyConfiguration#getPrioritizer()}.
*/
public interface PrioritizedCopyableDataset extends IterableCopyableDataset {
/**
* Get an iterator of {@link FileSet}s of {@link CopyEntity}, each one representing a group of files to copy and
* associated actions, sorted by the input {@link Comparator},
* and with the provided {@link gobblin.util.request_allocation.Requestor} injected (this is important for pushdown).
*
* @param targetFs target {@link org.apache.hadoop.fs.FileSystem} where copied files will be placed.
* @param configuration {@link gobblin.data.management.copy.CopyConfiguration} for this job. See {@link gobblin.data.management.copy.CopyConfiguration}.
* @param prioritizer output {@link FileSet}s must be sorted by this {@link Comparator}.
* @param requestor the {@link gobblin.util.request_allocation.Requestor} object that all {@link FileSet}s should have.
* @throws IOException
*/
public Iterator<FileSet<CopyEntity>> getFileSetIterator(FileSystem targetFs, CopyConfiguration configuration,
Comparator<FileSet<CopyEntity>> prioritizer, PushDownRequestor<FileSet<CopyEntity>> requestor) throws IOException;
}