/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.pullserver;
import com.google.common.base.Preconditions;
import com.google.common.cache.LoadingCache;
import com.google.gson.Gson;
import io.netty.handler.codec.http.QueryStringDecoder;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.conf.TajoConf.ConfVars;
import org.apache.tajo.pullserver.PullServerConstants.Param;
import org.apache.tajo.pullserver.retriever.FileChunk;
import org.apache.tajo.pullserver.retriever.FileChunkMeta;
import org.apache.tajo.pullserver.retriever.IndexCacheKey;
import org.apache.tajo.storage.*;
import org.apache.tajo.storage.RowStoreUtil.RowStoreDecoder;
import org.apache.tajo.storage.index.bst.BSTIndex.BSTIndexReader;
import org.apache.tajo.util.Pair;
import java.io.*;
import java.net.URI;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class PullServerUtil {
private static final Log LOG = LogFactory.getLog(PullServerUtil.class);
private static boolean nativeIOPossible = false;
static {
if (NativeIO.isAvailable()) {
nativeIOPossible = true;
} else {
LOG.warn("Unable to load hadoop nativeIO");
}
}
public static boolean isNativeIOPossible() {
return nativeIOPossible;
}
/**
* Call posix_fadvise on the given file descriptor. See the manpage
* for this syscall for more information. On systems where this
* call is not available, does nothing.
*/
public static void posixFadviseIfPossible(String identifier, java.io.FileDescriptor fd,
long offset, long len, int flags) {
if (nativeIOPossible) {
try {
NativeIO.POSIX.getCacheManipulator().posixFadviseIfPossible(identifier, fd, offset, len, flags);
} catch (Throwable t) {
nativeIOPossible = false;
LOG.warn("Failed to manage OS cache for " + identifier, t);
}
}
}
public static Path getBaseOutputDir(String queryId, String executionBlockSequenceId) {
return StorageUtil.concatPath(
queryId,
"output",
executionBlockSequenceId);
}
public static Path getBaseInputDir(String queryId, String executionBlockId) {
return StorageUtil.concatPath(
queryId,
"in",
executionBlockId);
}
public static List<String> splitMaps(List<String> mapq) {
if (null == mapq) {
return null;
}
final List<String> ret = new ArrayList<>();
for (String s : mapq) {
Collections.addAll(ret, s.split(","));
}
return ret;
}
public static boolean isChunkRequest(String requestType) {
return requestType.equals(PullServerConstants.CHUNK_REQUEST_PARAM_STRING);
}
public static boolean isMetaRequest(String requestType) {
return requestType.equals(PullServerConstants.META_REQUEST_PARAM_STRING);
}
public static boolean isRangeShuffle(String shuffleType) {
return shuffleType.equals(PullServerConstants.RANGE_SHUFFLE_PARAM_STRING);
}
public static boolean isHashShuffle(String shuffleType) {
return shuffleType.equals(PullServerConstants.HASH_SHUFFLE_PARAM_STRING)
|| shuffleType.equals(PullServerConstants.SCATTERED_HASH_SHUFFLE_PARAM_STRING);
}
public static class PullServerParams extends HashMap<String, List<String>> {
public PullServerParams(URI uri) {
this(uri.toString());
}
public PullServerParams(String uri) {
super(new QueryStringDecoder(uri).parameters());
}
public boolean contains(Param param) {
return containsKey(param.key());
}
public List<String> get(Param param) {
return get(param.key());
}
private String checkAndGetFirstParam(Param param) {
Preconditions.checkArgument(contains(param), "Missing " + param.name());
Preconditions.checkArgument(get(param).size() == 1, "Too many params: " + param.name());
return get(param).get(0);
}
private List<String> checkAndGet(Param param) {
Preconditions.checkArgument(contains(param), "Missing " + param.name());
return get(param);
}
public String requestType() {
return checkAndGetFirstParam(Param.REQUEST_TYPE);
}
public String shuffleType() {
return checkAndGetFirstParam(Param.SHUFFLE_TYPE);
}
public String queryId() {
return checkAndGetFirstParam(Param.QUERY_ID);
}
public String ebId() {
return checkAndGetFirstParam(Param.EB_ID);
}
public long offset() {
return contains(Param.OFFSET) && get(Param.OFFSET).size() == 1 ?
Long.parseLong(get(Param.OFFSET).get(0)) : -1L;
}
public long length() {
return contains(Param.LENGTH) && get(Param.LENGTH).size() == 1 ?
Long.parseLong(get(Param.LENGTH).get(0)) : -1L;
}
public String startKey() {
return checkAndGetFirstParam(Param.START);
}
public String endKey() {
return checkAndGetFirstParam(Param.END);
}
public boolean last() {
return contains(Param.FINAL);
}
public String partId() {
return checkAndGetFirstParam(Param.PART_ID);
}
public List<String> taskAttemptIds() {
return checkAndGet(Param.TASK_ID);
}
}
public static class PullServerRequestURIBuilder {
private final StringBuilder builder = new StringBuilder("http://");
private String requestType;
private String shuffleType;
private String queryId;
private Integer ebId;
private Integer partId;
private List<Integer> taskIds;
private List<Integer> attemptIds;
private List<String> taskAttemptIds;
private Long offset;
private Long length;
private String startKeyBase64;
private String endKeyBase64;
private boolean last;
private final int maxUrlLength;
public PullServerRequestURIBuilder(String pullServerAddr, int pullServerPort, int maxUrlLength) {
this(pullServerAddr, Integer.toString(pullServerPort), maxUrlLength);
}
public PullServerRequestURIBuilder(String pullServerAddr, String pullServerPort, int maxUrlLength) {
builder.append(pullServerAddr).append(":").append(pullServerPort).append("/?");
this.maxUrlLength = maxUrlLength;
}
public List<URI> build(boolean includeTasks) {
append(Param.REQUEST_TYPE, requestType)
.append(Param.QUERY_ID, queryId)
.append(Param.EB_ID, ebId)
.append(Param.PART_ID, partId)
.append(Param.SHUFFLE_TYPE, shuffleType);
if (startKeyBase64 != null) {
try {
append(Param.START, URLEncoder.encode(startKeyBase64, "utf-8"))
.append(Param.END, URLEncoder.encode(endKeyBase64, "utf-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
if (last) {
append(Param.FINAL, Boolean.toString(last));
}
}
if (length != null) {
append(Param.OFFSET, offset.toString())
.append(Param.LENGTH, length.toString());
}
List<URI> results = new ArrayList<>();
if (!includeTasks || isHashShuffle(shuffleType)) {
results.add(URI.create(builder.toString()));
} else {
builder.append(Param.TASK_ID.key()).append("=");
List<String> taskAttemptIds = this.taskAttemptIds;
if (taskAttemptIds == null) {
// Sort task ids to increase cache hit in pull server
taskAttemptIds = IntStream.range(0, taskIds.size())
.mapToObj(i -> new Pair<>(taskIds.get(i), attemptIds.get(i)))
.sorted((p1, p2) -> p1.getFirst() - p2.getFirst())
// In the case of hash shuffle each partition has single shuffle file per worker.
// TODO If file is large, consider multiple fetching(shuffle file can be split)
.filter(pair -> pair.getFirst() >= 0)
.map(pair -> pair.getFirst() + "_" + pair.getSecond())
.collect(Collectors.toList());
}
// If the get request is longer than 2000 characters,
// the long request uri may cause HTTP Status Code - 414 Request-URI Too Long.
// Refer to http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.4.15
// The below code transforms a long request to multiple requests.
List<String> taskIdsParams = new ArrayList<>();
StringBuilder taskIdListBuilder = new StringBuilder();
boolean first = true;
for (int i = 0; i < taskAttemptIds.size(); i++) {
if (!first) {
taskIdListBuilder.append(",");
}
first = false;
if (builder.length() + taskIdListBuilder.length() > maxUrlLength) {
taskIdsParams.add(taskIdListBuilder.toString());
taskIdListBuilder = new StringBuilder(taskAttemptIds.get(i));
} else {
taskIdListBuilder.append(taskAttemptIds.get(i));
}
}
// if the url params remain
if (taskIdListBuilder.length() > 0) {
taskIdsParams.add(taskIdListBuilder.toString());
}
for (String param : taskIdsParams) {
results.add(URI.create(builder + param));
}
}
return results;
}
private PullServerRequestURIBuilder append(Param key, Object val) {
builder.append(key.key())
.append("=")
.append(val)
.append("&");
return this;
}
public PullServerRequestURIBuilder setRequestType(String type) {
this.requestType = type;
return this;
}
public PullServerRequestURIBuilder setShuffleType(String shuffleType) {
this.shuffleType = shuffleType;
return this;
}
public PullServerRequestURIBuilder setQueryId(String queryId) {
this.queryId = queryId;
return this;
}
public PullServerRequestURIBuilder setEbId(String ebId) {
this.ebId = Integer.parseInt(ebId);
return this;
}
public PullServerRequestURIBuilder setEbId(Integer ebId) {
this.ebId = ebId;
return this;
}
public PullServerRequestURIBuilder setPartId(String partId) {
this.partId = Integer.parseInt(partId);
return this;
}
public PullServerRequestURIBuilder setPartId(Integer partId) {
this.partId = partId;
return this;
}
public PullServerRequestURIBuilder setTaskIds(List<Integer> taskIds) {
this.taskIds = taskIds;
return this;
}
public PullServerRequestURIBuilder setAttemptIds(List<Integer> attemptIds) {
this.attemptIds = attemptIds;
return this;
}
public PullServerRequestURIBuilder setTaskAttemptIds(List<String> taskAttemptIds) {
this.taskAttemptIds = taskAttemptIds;
return this;
}
public PullServerRequestURIBuilder setOffset(long offset) {
this.offset = offset;
return this;
}
public PullServerRequestURIBuilder setLength(long length) {
this.length = length;
return this;
}
public PullServerRequestURIBuilder setStartKeyBase64(String startKeyBase64) {
this.startKeyBase64 = startKeyBase64;
return this;
}
public PullServerRequestURIBuilder setEndKeyBase64(String endKeyBase64) {
this.endKeyBase64 = endKeyBase64;
return this;
}
public PullServerRequestURIBuilder setLastInclude(boolean last) {
this.last = last;
return this;
}
}
public static boolean useExternalPullServerService(TajoConf conf) {
// TODO: add more service types like mesos
return TajoPullServerService.isStandalone()
|| conf.getBoolVar(ConfVars.YARN_SHUFFLE_SERVICE_ENABLED);
}
private static FileChunkMeta searchFileChunkMeta(String queryId,
String ebSeqId,
String taskId,
Path outDir,
String startKey,
String endKey,
boolean last,
LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
int lowCacheHitCheckThreshold) throws IOException, ExecutionException {
SearchResult result = searchCorrespondPart(queryId, ebSeqId, outDir, startKey, endKey, last,
indexReaderCache, lowCacheHitCheckThreshold);
// Do not send file chunks of 0 length
if (result != null) {
long startOffset = result.startOffset;
long endOffset = result.endOffset;
FileChunkMeta chunk = new FileChunkMeta(startOffset, endOffset - startOffset, ebSeqId, taskId);
if (LOG.isDebugEnabled()) LOG.debug("Retrieve File Chunk: " + chunk);
return chunk;
} else {
return null;
}
}
private static FileChunk searchFileChunk(String queryId,
String ebSeqId,
Path outDir,
String startKey,
String endKey,
boolean last,
LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
int lowCacheHitCheckThreshold) throws IOException, ExecutionException {
final SearchResult result = searchCorrespondPart(queryId, ebSeqId, outDir, startKey, endKey, last,
indexReaderCache, lowCacheHitCheckThreshold);
if (result != null) {
long startOffset = result.startOffset;
long endOffset = result.endOffset;
FileChunk chunk = new FileChunk(result.data, startOffset, endOffset - startOffset);
if (LOG.isDebugEnabled()) LOG.debug("Retrieve File Chunk: " + chunk);
return chunk;
} else {
return null;
}
}
private static class SearchResult {
File data;
long startOffset;
long endOffset;
public SearchResult(File data, long startOffset, long endOffset) {
this.data = data;
this.startOffset = startOffset;
this.endOffset = endOffset;
}
}
private static SearchResult searchCorrespondPart(String queryId,
String ebSeqId,
Path outDir,
String startKey,
String endKey,
boolean last,
LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
int lowCacheHitCheckThreshold) throws IOException, ExecutionException {
BSTIndexReader idxReader = indexReaderCache.get(new IndexCacheKey(outDir, queryId, ebSeqId));
idxReader.retain();
File data;
long startOffset;
long endOffset;
try {
if (LOG.isDebugEnabled()) {
if (indexReaderCache.size() > lowCacheHitCheckThreshold && indexReaderCache.stats().hitRate() < 0.5) {
LOG.debug("Too low cache hit rate: " + indexReaderCache.stats());
}
}
Tuple indexedFirst = idxReader.getFirstKey();
Tuple indexedLast = idxReader.getLastKey();
if (indexedFirst == null && indexedLast == null) { // if # of rows is zero
if (LOG.isDebugEnabled()) {
LOG.debug("There is no contents");
}
return null;
}
byte[] startBytes = Base64.decodeBase64(startKey);
byte[] endBytes = Base64.decodeBase64(endKey);
Tuple start;
Tuple end;
Schema keySchema = idxReader.getKeySchema();
RowStoreDecoder decoder = RowStoreUtil.createDecoder(keySchema);
try {
start = decoder.toTuple(startBytes);
} catch (Throwable t) {
throw new IllegalArgumentException("StartKey: " + startKey
+ ", decoded byte size: " + startBytes.length, t);
}
try {
end = decoder.toTuple(endBytes);
} catch (Throwable t) {
throw new IllegalArgumentException("EndKey: " + endKey
+ ", decoded byte size: " + endBytes.length, t);
}
data = new File(URI.create(outDir.toUri() + "/output"));
if (LOG.isDebugEnabled()) {
LOG.debug("GET Request for " + data.getAbsolutePath() + " (start=" + start + ", end=" + end +
(last ? ", last=true" : "") + ")");
}
TupleComparator comparator = idxReader.getComparator();
if (comparator.compare(end, indexedFirst) < 0 ||
comparator.compare(indexedLast, start) < 0) {
if (LOG.isDebugEnabled()) {
LOG.debug("Out of Scope (indexed data [" + indexedFirst + ", " + indexedLast +
"], but request start:" + start + ", end: " + end);
}
return null;
}
try {
idxReader.init();
startOffset = idxReader.find(start);
} catch (IOException ioe) {
LOG.error("State Dump (the requested range: "
+ "[" + start + ", " + end + ")" + ", idx min: "
+ idxReader.getFirstKey() + ", idx max: "
+ idxReader.getLastKey());
throw ioe;
}
try {
endOffset = idxReader.find(end);
if (endOffset == -1) {
endOffset = idxReader.find(end, true);
}
} catch (IOException ioe) {
LOG.error("State Dump (the requested range: "
+ "[" + start + ", " + end + ")" + ", idx min: "
+ idxReader.getFirstKey() + ", idx max: "
+ idxReader.getLastKey());
throw ioe;
}
// if startOffset == -1 then case 2-1 or case 3
if (startOffset == -1) { // this is a hack
// if case 2-1 or case 3
try {
startOffset = idxReader.find(start, true);
} catch (IOException ioe) {
LOG.error("State Dump (the requested range: "
+ "[" + start + ", " + end + ")" + ", idx min: "
+ idxReader.getFirstKey() + ", idx max: "
+ idxReader.getLastKey());
throw ioe;
}
}
if (startOffset == -1) {
throw new IllegalStateException("startOffset " + startOffset + " is negative \n" +
"State Dump (the requested range: "
+ "[" + start + ", " + end + ")" + ", idx min: " + idxReader.getFirstKey() + ", idx max: "
+ idxReader.getLastKey());
}
// if greater than indexed values
if (last || (endOffset == -1
&& comparator.compare(idxReader.getLastKey(), end) < 0)) {
endOffset = data.length();
}
} finally {
idxReader.release();
}
return new SearchResult(data, startOffset, endOffset);
}
/**
* Retrieve meta information of file chunks which correspond to the requested URI.
* Only meta information for the file chunks which has non-zero length are retrieved.
*
* @param conf
* @param lDirAlloc
* @param localFS
* @param params
* @param gson
* @param indexReaderCache
* @param lowCacheHitCheckThreshold
* @return
* @throws IOException
* @throws ExecutionException
*/
public static List<String> getJsonMeta(final TajoConf conf,
final LocalDirAllocator lDirAlloc,
final FileSystem localFS,
final PullServerParams params,
final Gson gson,
final LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
final int lowCacheHitCheckThreshold)
throws IOException, ExecutionException {
final List<String> taskIds = PullServerUtil.splitMaps(params.taskAttemptIds());
final Path queryBaseDir = PullServerUtil.getBaseOutputDir(params.queryId(), params.ebId());
final List<String> jsonMetas = new ArrayList<>();
for (String eachTaskId : taskIds) {
Path outputPath = StorageUtil.concatPath(queryBaseDir, eachTaskId, "output");
if (!lDirAlloc.ifExists(outputPath.toString(), conf)) {
LOG.warn("Range shuffle - file not exist. " + outputPath);
continue;
}
Path path = localFS.makeQualified(lDirAlloc.getLocalPathToRead(outputPath.toString(), conf));
FileChunkMeta meta;
meta = PullServerUtil.searchFileChunkMeta(params.queryId(), params.ebId(), eachTaskId, path,
params.startKey(), params.endKey(), params.last(), indexReaderCache, lowCacheHitCheckThreshold);
if (meta != null && meta.getLength() > 0) {
String jsonStr = gson.toJson(meta, FileChunkMeta.class);
jsonMetas.add(jsonStr);
}
}
return jsonMetas;
}
/**
* Retrieve file chunks which correspond to the requested URI.
* Only the file chunks which has non-zero length are retrieved.
*
* @param conf
* @param lDirAlloc
* @param localFS
* @param params
* @param indexReaderCache
* @param lowCacheHitCheckThreshold
* @return
* @throws IOException
* @throws ExecutionException
*/
public static List<FileChunk> getFileChunks(final TajoConf conf,
final LocalDirAllocator lDirAlloc,
final FileSystem localFS,
final PullServerParams params,
final LoadingCache<IndexCacheKey, BSTIndexReader> indexReaderCache,
final int lowCacheHitCheckThreshold)
throws IOException, ExecutionException {
final List<FileChunk> chunks = new ArrayList<>();
final String queryId = params.queryId();
final String shuffleType = params.shuffleType();
final String sid = params.ebId();
final long offset = params.offset();
final long length = params.length();
final Path queryBaseDir = PullServerUtil.getBaseOutputDir(queryId, sid);
if (LOG.isDebugEnabled()) {
LOG.debug("PullServer request param: shuffleType=" + shuffleType + ", sid=" + sid);
// the working dir of tajo worker for each query
LOG.debug("PullServer baseDir: " + conf.get(ConfVars.WORKER_TEMPORAL_DIR.varname) + "/" + queryBaseDir);
}
// if a stage requires a range shuffle
if (PullServerUtil.isRangeShuffle(shuffleType)) {
final List<String> taskIdList = params.taskAttemptIds();
final List<String> taskIds = PullServerUtil.splitMaps(taskIdList);
final String startKey = params.startKey();
final String endKey = params.endKey();
final boolean last = params.last();
long before = System.currentTimeMillis();
for (String eachTaskId : taskIds) {
Path outputPath = StorageUtil.concatPath(queryBaseDir, eachTaskId, "output");
if (!lDirAlloc.ifExists(outputPath.toString(), conf)) {
LOG.warn(outputPath + " does not exist.");
continue;
}
Path path = localFS.makeQualified(lDirAlloc.getLocalPathToRead(outputPath.toString(), conf));
FileChunk chunk = PullServerUtil.searchFileChunk(queryId, sid, path, startKey, endKey, last, indexReaderCache,
lowCacheHitCheckThreshold);
if (chunk != null) {
chunks.add(chunk);
}
}
long after = System.currentTimeMillis();
LOG.info("Index lookup time: " + (after - before) + " ms");
// if a stage requires a hash shuffle or a scattered hash shuffle
} else if (PullServerUtil.isHashShuffle(shuffleType)) {
final String partId = params.partId();
int partParentId = HashShuffleAppenderManager.getPartParentId(Integer.parseInt(partId), conf);
Path partPath = StorageUtil.concatPath(queryBaseDir, "hash-shuffle", String.valueOf(partParentId), partId);
if (!lDirAlloc.ifExists(partPath.toString(), conf)) {
throw new FileNotFoundException(partPath.toString());
}
Path path = localFS.makeQualified(lDirAlloc.getLocalPathToRead(partPath.toString(), conf));
File file = new File(path.toUri());
long startPos = (offset >= 0 && length >= 0) ? offset : 0;
long readLen = (offset >= 0 && length >= 0) ? length : file.length();
if (startPos >= file.length()) {
String errorMessage = "Start pos[" + startPos + "] great than file length [" + file.length() + "]";
throw new EOFException(errorMessage);
}
FileChunk chunk = new FileChunk(file, startPos, readLen);
chunks.add(chunk);
} else {
throw new IllegalArgumentException(shuffleType);
}
return chunks.stream().filter(c -> c.length() > 0).collect(Collectors.toList());
}
}