/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.util;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.regex.*;
import java.util.*;
import java.io.*;
import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.chukwa.*;
import org.apache.hadoop.chukwa.conf.ChukwaConfiguration;
import org.apache.hadoop.chukwa.extraction.engine.RecordUtil;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.conf.Configuration;
public class DumpChunks {
/**
* Tries to find chunks matching a given pattern.
* Takes as input a set of &-delimited patterns, followed
* by a list of file names.
*
* E.g: Dump datatype=Iostat&source=/my/log/.* *.done
*/
public static void main(String[] args) throws IOException, URISyntaxException {
if(args.length < 2) {
System.out.println("usage: Dump [-s] pattern1,pattern2,pattern3... file1 file2 file3...");
System.exit(-1);
}
ChukwaConfiguration conf = new ChukwaConfiguration();
dump(args, conf, System.out);
}
static FileSystem getFS(Configuration conf, String uri) throws IOException, URISyntaxException {
FileSystem fs;
if(uri.contains("://")) {
fs = FileSystem.get(new URI(uri), conf);
} else {
String fsName = conf.get("writer.hdfs.filesystem");
if(fsName == null)
fs = FileSystem.getLocal(conf);
else
fs = FileSystem.get(conf);
}
System.err.println("filesystem is " + fs.getUri());
return fs;
}
static void dump(String[] args, Configuration conf, PrintStream out) throws IOException, URISyntaxException {
int filterArg = 0;
boolean summarize = false;
boolean nosort = false;
if(args[0].equals("-s")) {
filterArg++;
summarize = true;
} else if(args[0].equals("--nosort")) {
filterArg++;
nosort = true;
}
Filter patterns;
if(args[filterArg].toLowerCase().equals("all"))
patterns = Filter.ALL;
else
patterns = new Filter(args[filterArg]);
System.err.println("Patterns:" + patterns);
ArrayList<Path> filesToSearch = new ArrayList<Path>();
FileSystem fs = getFS(conf, args[filterArg + 1]);
for(int i=filterArg + 1; i < args.length; ++i){
Path[] globbedPaths = FileUtil.stat2Paths(fs.globStatus(new Path(args[i])));
if(globbedPaths != null)
for(Path p: globbedPaths)
filesToSearch.add(p);
}
System.err.println("expands to " + filesToSearch.size() + " actual files");
DumpChunks dc;
if(summarize)
dc = new DumpAndSummarize();
else if(nosort)
dc = new DumpNoSort(out);
else
dc= new DumpChunks();
try {
for(Path p: filesToSearch) {
SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf);
ChukwaArchiveKey key = new ChukwaArchiveKey();
ChunkImpl chunk = ChunkImpl.getBlankChunk();
while (r.next(key, chunk)) {
if(patterns.matches(chunk)) {
dc.updateMatchCatalog(key.getStreamName(), chunk);
chunk = ChunkImpl.getBlankChunk();
}
}
}
dc.displayResults(out);
} catch (Exception e) {
e.printStackTrace();
}
}
public DumpChunks() {
matchCatalog = new HashMap<String, SortedMap<Long, ChunkImpl> >();
}
Map<String, SortedMap<Long, ChunkImpl>> matchCatalog;
protected void displayResults(PrintStream out) throws IOException{
for(Map.Entry<String,SortedMap<Long, ChunkImpl>> streamE: matchCatalog.entrySet()) {
String header = streamE.getKey();
SortedMap<Long, ChunkImpl> stream = streamE.getValue();
long nextToPrint = 0;
if(stream.firstKey() > 0)
System.err.println("---- map starts at "+ stream.firstKey());
for(Map.Entry<Long, ChunkImpl> e: stream.entrySet()) {
if(e.getKey() >= nextToPrint) {
if(e.getKey() > nextToPrint)
System.err.println("---- printing bytes starting at " + e.getKey());
out.write(e.getValue().getData());
nextToPrint = e.getValue().getSeqID();
} else if(e.getValue().getSeqID() < nextToPrint) {
continue; //data already printed
} else {
//tricky case: chunk overlaps with already-printed data, but not completely
ChunkImpl c = e.getValue();
long chunkStartPos = e.getKey();
int numToPrint = (int) (c.getSeqID() - nextToPrint);
int printStartOffset = (int) ( nextToPrint - chunkStartPos);
out.write(c.getData(), printStartOffset, numToPrint);
nextToPrint = c.getSeqID();
}
}
out.println("\n--------"+header + "--------");
}
}
protected void updateMatchCatalog(String streamName, ChunkImpl chunk) throws IOException {
SortedMap<Long, ChunkImpl> chunksInStream = matchCatalog.get(streamName);
if(chunksInStream == null ) {
chunksInStream = new TreeMap<Long, ChunkImpl>();
matchCatalog.put(streamName, chunksInStream);
}
long startPos = chunk.getSeqID() - chunk.getLength();
ChunkImpl prevMatch = chunksInStream.get(startPos);
if(prevMatch == null)
chunksInStream.put(startPos, chunk);
else { //pick longest
if(chunk.getLength() > prevMatch.getLength())
chunksInStream.put (startPos, chunk);
}
}
static class DumpAndSummarize extends DumpChunks {
Map<String, Integer> matchCounts = new LinkedHashMap<String, Integer>();
Map<String, Long> byteCounts = new LinkedHashMap<String, Long>();
protected void displayResults(PrintStream out) throws IOException{
for(Map.Entry<String, Integer> s: matchCounts.entrySet()) {
out.print(s.getKey());
out.print(" ");
out.print(s.getValue());
out.print(" chunks ");
out.print(byteCounts.get(s.getKey()));
out.println(" bytes");
}
}
protected void updateMatchCatalog(String streamName, ChunkImpl chunk) {
Integer i = matchCounts.get(streamName);
if(i != null) {
matchCounts.put(streamName, i+1);
Long b = byteCounts.get(streamName);
byteCounts.put(streamName, b + chunk.getLength());
} else {
matchCounts.put(streamName, new Integer(1));
byteCounts.put(streamName, new Long(chunk.getLength()));
}
}
}
static class DumpNoSort extends DumpChunks {
PrintStream out;
public DumpNoSort(PrintStream out) {
this.out = out;
}
//Do some display
protected void updateMatchCatalog(String streamName, ChunkImpl chunk) throws IOException {
out.write(chunk.getData());
}
protected void displayResults(PrintStream out) throws IOException{
; //did this in updateMatchCatalog
}
}
}