package eu.dnetlib.iis.common; import java.io.File; import java.io.FileNotFoundException; /** * Service for fetching files from hdfs using ssh protocol * * @author madryk * */ public class SshHdfsFileFetcher { public final static String FILE_PATH_SEPARATOR = "/"; private final SshConnectionManager sshConnectionManager; private final String remoteUserDir; //------------------------ CONSTRUCTORS -------------------------- public SshHdfsFileFetcher(SshConnectionManager sshConnectionManager, String remoteTempDir) { this.remoteUserDir = appendFilePathSeparatorIfMissing(remoteTempDir); this.sshConnectionManager = sshConnectionManager; } //------------------------ LOGIC -------------------------- /** * Fetches file (or directory) from hdfs into target directory. * * @return location of fetched file */ public File fetchFile(String hdfsPath, File targetDir) throws FileNotFoundException { String filename = new File(hdfsPath).getName(); File localTargetFile = new File(targetDir, filename); String remoteFileTempDir = remoteUserDir + "temp_copy_" + System.currentTimeMillis() + FILE_PATH_SEPARATOR; checkIfFileExistsOnHdfs(hdfsPath); makeDirOnRemote(remoteFileTempDir); copyFromHdfsOnRemote(hdfsPath, remoteFileTempDir); downloadFromRemote(remoteFileTempDir + filename, localTargetFile); removeFromRemote(remoteFileTempDir); return localTargetFile; } //------------------------ PRIVATE -------------------------- private void checkIfFileExistsOnHdfs(String hdfsPath) throws FileNotFoundException { SshSimpleConnection sshConnection = sshConnectionManager.getConnection(); if (sshConnection.execute("hadoop fs -test -e " + hdfsPath, false).getExitStatus() != 0) { throw new FileNotFoundException("File " + hdfsPath + " not found on hdfs"); } } private void makeDirOnRemote(String hdfsPath) { sshConnectionManager.getConnection().execute("mkdir -p " + hdfsPath); } private void copyFromHdfsOnRemote(String hdfsSource, String remoteTarget) { sshConnectionManager.getConnection().execute("hadoop fs -get " + hdfsSource + " " + remoteTarget); } private void downloadFromRemote(String remoteSource, File localTarget) { localTarget.getParentFile().mkdirs(); sshConnectionManager.getConnection().download(remoteSource, localTarget.getAbsolutePath()); } private void removeFromRemote(String remotePath) { sshConnectionManager.getConnection().execute("rm -r " + remotePath); } private String appendFilePathSeparatorIfMissing(String directoryPath) { return directoryPath + (directoryPath.endsWith(FILE_PATH_SEPARATOR) ? "" : FILE_PATH_SEPARATOR); } }