/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.mapred.JobHistory;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class JobLog extends AbstractProcessor {
private String savedLines = "";
/**
* Job logs could be split into multiple lines.
* If input recordEntry ends with '"' or '" .', process the line.
* Otherwise, save the log and wait for the next log.
*
* @return An object of JobLogLine if a full job log is found. Null otherwise.
*/
public JobLogLine getJobLogLine(String recordEntry) {
if(recordEntry == null) {
savedLines = "";
return null;
}
recordEntry = recordEntry.trim();
if(recordEntry.length() == 0 || recordEntry.startsWith("Meta")) {
savedLines = "";
return null;
}
if(recordEntry.startsWith("Job")
|| recordEntry.startsWith("Meta")
|| recordEntry.startsWith("Task")
|| recordEntry.startsWith("MapAttempt")
|| recordEntry.startsWith("ReduceAttempt"))
{
savedLines = "";
}
savedLines += recordEntry;
if(!savedLines.endsWith("\"") && !savedLines.endsWith("\" .")) {
return null;
}
JobLogLine line = new JobLogLine(savedLines);
return line;
}
@Override
protected void parse(String recordEntry,
OutputCollector<ChukwaRecordKey, ChukwaRecord> output,
Reporter reporter) throws Throwable
{
JobLogLine line = getJobLogLine(recordEntry);
if(line == null || (!line.getLogType().equals("Meta")
&& !line.getLogType().equals("JobData")
&& !line.getLogType().equals("TaskData")))
{
return;
}
if(line.getLogType().equals("Meta")) {
String streamName = chunk.getStreamName();
if(streamName == null) {
return;
}
String jobId = JobLogFileName.getJobIdFromFileName(streamName);
if(jobId == null) {
return;
}
line.setLogType("JobData");
}
key = new ChukwaRecordKey();
ChukwaRecord record = new ChukwaRecord();
this.buildGenericRecord(record, null, -1l, line.getLogType());
for (Entry<String, String> entry : line.entrySet()) {
record.add(entry.getKey(), entry.getValue());
}
for(Entry<String, Long> entry : line.getCounterHash().flat().entrySet()) {
record.add(entry.getKey(), entry.getValue().toString());
}
long timestamp = line.getTimestamp();
record.setTime(timestamp);
key.setKey(getKey(timestamp, line.getJobId()));
output.collect(key, record);
}
private String getKey(long ts, String jobId) {
long unit = 60 * 60 * 1000;
if(ts == 0) {
ts = archiveKey.getTimePartition();
}
long rounded = (ts / unit) * unit;
return rounded + "/" + jobId + "/" + ts;
}
public static class JobLogLine extends HashMap<String, String> {
private static final long serialVersionUID = 4902948603527677036L;
/**
* search timestamp from stream. if no timestamp found, use last seen one.
*/
private static final String[] timestampKeys = {
JobHistory.Keys.SUBMIT_TIME.toString(),
JobHistory.Keys.LAUNCH_TIME.toString(),
JobHistory.Keys.START_TIME.toString(),
JobHistory.Keys.FINISH_TIME.toString(),
};
private static long lastTimestamp = 0l;
private String logType;
private String jobId;
private String taskId;
private CounterHash counterHash;
/**
* example lines:
* Task TASKID="task_200903062215_0577_r_000000" TASK_TYPE="REDUCE" START_TIME="1236386538540" SPLITS="" .
* Job JOBID="job_200903062215_0577" JOB_PRIORITY="NORMAL" .
* Job JOBID="job_200903062215_0577" LAUNCH_TIME="1236386526545" TOTAL_MAPS="14" TOTAL_REDUCES="1" JOB_STATUS="PREP" .
*/
public JobLogLine(String line) {
line = line.trim();
if (line.length() == 0)
return;
String key = null;
String[] pairs = line.split("=\"");
for (int i = 0; i < pairs.length; i++) {
if (i == 0) {
String[] fields = pairs[i].split(" ");
logType = fields[0];
if(logType.equals("Job")) {
logType = "JobData";
}
else if (logType.equals("Task") || logType.equals("MapAttempt") || logType.equals("ReduceAttempt")) {
logType = "TaskData";
}
if (fields.length > 1)
key = fields[1];
continue;
}
int pos = pairs[i].lastIndexOf('"');
String value = pairs[i].substring(0, pos);
put(key, value);
if(i == (pairs.length-1))
break;
key = pairs[i].substring(pos + 2);
}
// jobid format: job_200903062215_0577
jobId = get(JobHistory.Keys.JOBID.toString());
// taskid format: task_200903062215_0577_r_000000
taskId = get(JobHistory.Keys.TASKID.toString());
if(taskId != null) {
String[] fields = taskId.split("_");
jobId = "job_" + fields[1] + "_" + fields[2];
put(JobHistory.Keys.JOBID.toString(), jobId);
taskId = taskId.substring(5);
}
counterHash = new CounterHash(get(JobHistory.Keys.COUNTERS.toString()));
if(get("TASK_ATTEMPT_ID") != null) {
put("TASK_ATTEMPT_TIMES", "" + getAttempts());
}
if(logType.equals("JobData") && get(JobHistory.Keys.FINISH_TIME.toString())!=null) {
put("JOB_FINAL_STATUS", get("JOB_STATUS"));
}
for(String timeKey : timestampKeys) {
String value = get(timeKey);
if(value == null || value.equals("0")) {
remove(timeKey);
}
}
}
public String getLogType() {
return logType;
}
public void setLogType(String logType) {
this.logType = logType;
}
public String getJobId() {
return jobId;
}
public String getTaskId() {
return taskId;
}
public long getTimestamp() {
for(String key : timestampKeys) {
String value = get(key);
if(value != null && value.length() != 0) {
long ts = Long.parseLong(value);
if(ts > lastTimestamp) {
lastTimestamp = ts;
}
break;
}
}
return lastTimestamp;
}
public CounterHash getCounterHash() {
return counterHash;
}
public int getAttempts() {
String attemptId = get("TASK_ATTEMPT_ID");
if(attemptId == null) {
return -1;
}
else {
try {
String[] elems = attemptId.split("_");
return Integer.parseInt(elems[elems.length - 1] + 1);
} catch (NumberFormatException e) {
return -1;
}
}
}
}
/**
* Parse counter string to object
*
* Example string:
* {(org\.apache\.hadoop\.mapred\.JobInProgress$Counter)(Job Counters )
[(TOTAL_LAUNCHED_REDUCES)(Launched reduce tasks)(1)]
[(TOTAL_LAUNCHED_MAPS)(Launched map tasks)(14)]
[(DATA_LOCAL_MAPS)(Data-local map tasks)(14)]
}
{(FileSystemCounters)(FileSystemCounters)
[(FILE_BYTES_READ)(FILE_BYTES_READ)(132)]
[(HDFS_BYTES_READ)(HDFS_BYTES_READ)(20471)]
[(FILE_BYTES_WRITTEN)(FILE_BYTES_WRITTEN)(790)]
[(HDFS_BYTES_WRITTEN)(HDFS_BYTES_WRITTEN)(248)]
}
*/
public static class CounterHash extends HashMap<String, HashMap<String, Long>>{
public CounterHash(String str) {
if(str == null) {
return;
}
if(str.startsWith("{")) {
for(String group : split(str, "[{}]")) {
HashMap<String, Long> hash = null;
for(String counter : split(group, "[\\[\\]]")) {
ArrayList<String> idAndDisplay = split(counter, "[\\(\\)]");
if(hash == null) {
hash = new HashMap<String, Long>();
String groupId = idAndDisplay.get(0).replaceAll("\\\\.", ".");
put(groupId, hash);
}
else {
hash.put(idAndDisplay.get(0), Long.parseLong(idAndDisplay.get(2)));
}
}
}
} else {
HashMap<String, Long> hash = new HashMap<String, Long>();
put("Hadoop18", hash);
for(String counter : split(str, ",")) {
ArrayList<String> kv = split(counter, ":");
hash.put(kv.get(0), Long.parseLong(kv.get(1)));
}
}
}
/**
* Flat the counter hashs and add into map passed int.
*
* For example mentioned in the constructor, the result will be
* <pre>
* Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:TOTAL_LAUNCHED_REDUCES=1
* Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:TOTAL_LAUNCHED_MAPS=14
* Counter:org\.apache\.hadoop\.mapred\.JobInProgress$Counter:DATA_LOCAL_MAPS=14
* Counter:FileSystemCounters:FILE_BYTES_READ=132
* Counter:FileSystemCounters:HDFS_BYTES_READ=20471
* Counter:FileSystemCounters:FILE_BYTES_WRITTEN=790
* Counter:FileSystemCounters:HDFS_BYTES_WRITTEN=248
* </pre>
*/
public HashMap<String, Long> flat() {
HashMap<String, Long> result = new HashMap<String, Long>();
for(Entry<String, HashMap<String, Long>> entry : entrySet()) {
String id = entry.getKey();
for(Entry<String, Long> counterValue : entry.getValue().entrySet()) {
result.put("Counter:" + id + ":" + counterValue.getKey(), counterValue.getValue());
}
}
return result;
}
}
public static ArrayList<String> split(String s, String regex) {
ArrayList<String> result = new ArrayList<String>();
for(String field : s.split(regex)) {
if(field != null && field.length()>0) {
result.add(field);
}
}
return result;
}
private static class JobLogFileName {
private static final Pattern pattern = Pattern.compile("job_[0-9]+_[0-9]+");
public static String getJobIdFromFileName(String name) {
Matcher matcher = pattern.matcher(name);
if (matcher.find()) {
return matcher.group(0);
}
else {
return null;
}
}
}
}