package org.rakam.analysis.datasource;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.io.ByteStreams;
import org.rakam.collection.FieldType;
import org.rakam.collection.SchemaField;
import org.rakam.util.JsonHelper;
import org.rakam.util.RakamException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST;
import static java.lang.Boolean.FALSE;
import static org.rakam.analysis.datasource.RemoteTable.ExternalSourceType.AVRO;
import static org.rakam.analysis.datasource.RemoteTable.ExternalSourceType.CSV;
public class ExternalFileCustomDataSource
{
public static List<SchemaField> fillColumnIfNotSet(Map<String, String> typeOptions, RemoteTable.ExternalSourceType format, URL url, boolean indexUrl)
{
if (format == CSV && !FALSE.toString().equals(typeOptions.get("use_header"))) {
URL file;
try {
file = getFile(url, indexUrl);
}
catch (RuntimeException e) {
throw new RakamException(e.getMessage(), BAD_REQUEST);
}
String separator = Optional.ofNullable(typeOptions.get("column_separator")).orElse(",");
try {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(file.openStream()));
ImmutableList.Builder<SchemaField> builder = ImmutableList.builder();
for (String column : Splitter.on(separator).split(bufferedReader.readLine())) {
builder.add(new SchemaField(column, FieldType.STRING));
}
return builder.build();
}
catch (IOException e) {
throw new RakamException("Error while parsing CSV: " + e.getMessage(), BAD_REQUEST);
}
}
throw new RakamException("columns parameter is required", BAD_REQUEST);
}
private static URL getFile(URL url, boolean indexUrl)
throws RuntimeException
{
if (!url.getProtocol().equals("http") && !url.getProtocol().equals("https") && !url.getProtocol().equals("ftp")) {
throw new RuntimeException("URL is not valid. Use http, https or ftp schemes");
}
URL testUrl = url;
if (indexUrl) {
List<String> urls;
try {
urls = JsonHelper.read(ByteStreams.toByteArray(url.openStream()),
new TypeReference<List<String>>() {});
}
catch (IOException e) {
throw new RuntimeException("The index file must be an array containing urls. Example: [\"http://myurl.com/a.csv\"]");
}
if (urls == null || urls.isEmpty()) {
throw new RuntimeException("Index file doesn't have any entry");
}
try {
testUrl = new URL(urls.get(0));
}
catch (MalformedURLException e) {
throw new RuntimeException("Index file doesn't contain URL values");
}
}
return testUrl;
}
public static Optional<String> test(RemoteTable remoteTable)
{
try {
URL testUrl;
try {
testUrl = getFile(remoteTable.url, remoteTable.indexUrl);
}
catch (RuntimeException e) {
return Optional.of(e.getMessage());
}
if (remoteTable.format == CSV) {
String line;
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(testUrl.openStream()));
int i = 0;
try {
while (((line = bufferedReader.readLine()) != null) && i < 5) {
if (line.isEmpty()) {
// TODO: better alternative to detect csv
return Optional.of("There are empty lines");
}
i++;
}
}
finally {
bufferedReader.close();
}
}
else if (remoteTable.format == AVRO) {
try {
testUrl.openStream().read();
}
catch (IOException e) {
return Optional.of("Unable to read data from server");
}
}
else {
throw new IllegalStateException();
}
return Optional.empty();
}
catch (IOException e) {
return Optional.of(e.getMessage());
}
}
}