/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa.extraction.demux.processor.mapper;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecord;
import org.apache.hadoop.chukwa.extraction.engine.ChukwaRecordKey;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.log4j.Logger;
public class Ps extends AbstractProcessor {
static Logger log = Logger.getLogger(Ps.class);
@Override
protected void parse(String recordEntry,
OutputCollector<ChukwaRecordKey, ChukwaRecord> output, Reporter reporter)
throws Throwable {
LogEntry log = new LogEntry(recordEntry);
PsOutput ps = new PsOutput(log.getBody());
for (HashMap<String, String> processInfo : ps.getProcessList()) {
key = new ChukwaRecordKey();
ChukwaRecord record = new ChukwaRecord();
this.buildGenericRecord(record, null, log.getDate().getTime(), "Ps");
for (Entry<String, String> entry : processInfo.entrySet()) {
record.add(entry.getKey(), entry.getValue());
}
output.collect(key, record);
}
}
public static class PsOutput {
// processes info
private ArrayList<HashMap<String, String>> recordList = new ArrayList<HashMap<String, String>>();
public PsOutput(String psCmdOutput) throws InvalidPsRecord {
if (psCmdOutput == null || psCmdOutput.length() == 0)
return;
String[] lines = psCmdOutput.split("[\n\r]+");
// at least two lines
if (lines.length < 2)
return;
// header
ArrayList<String> header = new ArrayList<String>();
Matcher matcher = Pattern.compile("[^ ^\t]+").matcher(lines[0]);
while (matcher.find()) {
header.add(matcher.group(0));
}
if (!header.get(header.size() - 1).equals("CMD")) {
throw new InvalidPsRecord("CMD must be the last column");
}
// records
boolean foundInitCmd = false;
for (int line = 1; line < lines.length; line++) {
HashMap<String, String> record = new HashMap<String, String>();
recordList.add(record);
matcher = Pattern.compile("[^ ^\t]+").matcher(lines[line]);
for (int index = 0; index < header.size(); index++) {
String key = header.get(index);
matcher.find();
if (!key.equals("CMD")) {
String value = matcher.group(0);
/**
* For STARTED column, it could be in two formats: "MMM dd" or
* "hh:mm:ss". If we use ' ' as the delimiter, we must read twice to
* the date if it's with "MMM dd" format.
*/
if (key.equals("STARTED")) {
char c = value.charAt(0);
if (c < '0' || c > '9') {
matcher.find();
value += matcher.group(0);
}
}
record.put(key, value);
} else {
// reached the cmd part. all remains should be put
// together as the command
String value = lines[line].substring(matcher.start());
record.put(key, value);
if (!foundInitCmd)
foundInitCmd = value.startsWith("init");
break;
}
}
}
if (!foundInitCmd)
throw new InvalidPsRecord("Did not find 'init' cmd");
}
public ArrayList<HashMap<String, String>> getProcessList() {
return recordList;
}
}
public static class InvalidPsRecord extends Exception {
private static final long serialVersionUID = 1L;
public InvalidPsRecord() {
}
public InvalidPsRecord(String arg0) {
super(arg0);
}
public InvalidPsRecord(Throwable arg0) {
super(arg0);
}
public InvalidPsRecord(String arg0, Throwable arg1) {
super(arg0, arg1);
}
}
}