/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.chukwa;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.chukwa.datacollection.DataFactory;
import org.apache.hadoop.chukwa.datacollection.adaptor.Adaptor;
public class ChunkImpl implements org.apache.hadoop.io.Writable, Chunk {
public static int PROTOCOL_VERSION = 1;
protected DataFactory dataFactory = DataFactory.getInstance();
private String source = "";
private String streamName = "";
private String dataType = "";
private String tags = "";
private byte[] data = null;
private int[] recordEndOffsets;
private int protocolVersion = 1;
private String debuggingInfo = "";
private transient Adaptor initiator;
long seqID;
private static String localHostAddr;
static {
try {
setHostAddress(InetAddress.getLocalHost().getHostName());
} catch (UnknownHostException e) {
setHostAddress("localhost");
}
}
public static void setHostAddress(String host) {
ChunkImpl.localHostAddr = host;
}
public static ChunkImpl getBlankChunk() {
return new ChunkImpl();
}
ChunkImpl() {
}
public ChunkImpl(String dataType, String streamName, long seq, byte[] data,
Adaptor source) {
this.seqID = seq;
this.source = localHostAddr;
this.tags = dataFactory.getDefaultTags();
this.streamName = streamName;
this.dataType = dataType;
this.data = data;
this.initiator = source;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getData()
*/
public byte[] getData() {
return data;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#setData(byte[])
*/
public void setData(byte[] logEvent) {
this.data = logEvent;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getStreamName()
*/
public String getStreamName() {
return streamName;
}
public void setStreamName(String logApplication) {
this.streamName = logApplication;
}
public String getSource() {
return source;
}
public void setSource(String logSource) {
this.source = logSource;
}
public String getDebugInfo() {
return debuggingInfo;
}
public void setDebugInfo(String a) {
this.debuggingInfo = a;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getSeqID()
*/
public long getSeqID() {
return seqID;
}
public void setSeqID(long l) {
seqID = l;
}
public int getProtocolVersion() {
return protocolVersion;
}
public void setProtocolVersion(int pv) {
this.protocolVersion = pv;
}
public Adaptor getInitiator() {
return initiator;
}
public void setInitiator(Adaptor a) {
initiator = a;
}
public void setLogSource() {
source = localHostAddr;
}
public int[] getRecordOffsets() {
if (recordEndOffsets == null)
recordEndOffsets = new int[] { data.length - 1 };
return recordEndOffsets;
}
public void setRecordOffsets(int[] offsets) {
recordEndOffsets = offsets;
}
public String getDataType() {
return dataType;
}
public void setDataType(String t) {
dataType = t;
}
@Override
public void addTag(String tags) {
this.tags += " "+ tags;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getTags()
*/
public String getTags() {
return tags;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getTag(java.lang.String)
*/
public String getTag(String tagName) {
Pattern tagPattern = Pattern.compile("\\b"+tagName+"=\"([^\"]*)\"");
if (tags != null) {
Matcher matcher = tagPattern.matcher(tags);
if (matcher.find()) {
return matcher.group(1);
}
}
return null;
}
/**
* @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
public void readFields(DataInput in) throws IOException {
setProtocolVersion(in.readInt());
if (protocolVersion != PROTOCOL_VERSION) {
throw new IOException(
"Protocol version mismatched, drop data. source version: "
+ protocolVersion + ", collector version:" + PROTOCOL_VERSION);
}
setSeqID(in.readLong());
setSource(in.readUTF());
tags = in.readUTF(); // no public set method here
setStreamName(in.readUTF());
setDataType(in.readUTF());
setDebugInfo(in.readUTF());
int numRecords = in.readInt();
recordEndOffsets = new int[numRecords];
for (int i = 0; i < numRecords; ++i)
recordEndOffsets[i] = in.readInt();
data = new byte[recordEndOffsets[recordEndOffsets.length - 1] + 1];
in.readFully(data);
}
/**
* @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
*/
public void write(DataOutput out) throws IOException {
out.writeInt(PROTOCOL_VERSION);
out.writeLong(seqID);
out.writeUTF(source);
out.writeUTF(tags);
out.writeUTF(streamName);
out.writeUTF(dataType);
out.writeUTF(debuggingInfo);
if (recordEndOffsets == null)
recordEndOffsets = new int[] { data.length - 1 };
out.writeInt(recordEndOffsets.length);
for (int i = 0; i < recordEndOffsets.length; ++i)
out.writeInt(recordEndOffsets[i]);
out.write(data, 0, recordEndOffsets[recordEndOffsets.length - 1] + 1);
// byte at last offset is valid
}
public static ChunkImpl read(DataInput in) throws IOException {
ChunkImpl w = new ChunkImpl();
w.readFields(in);
return w;
}
// FIXME: should do something better here, but this is OK for debugging
public String toString() {
return source + ":" + streamName + ":" + new String(data) + "/" + seqID;
}
/**
* @see org.apache.hadoop.chukwa.Chunk#getSerializedSizeEstimate()
*/
public int getSerializedSizeEstimate() {
int size = 2 * (source.length() + streamName.length() + dataType.length()
+ debuggingInfo.length()); // length of strings (pessimistic)
size += data.length + 4;
if (recordEndOffsets == null)
size += 8;
else
size += 4 * (recordEndOffsets.length + 1); // +1 for length of array
size += 8; // uuid
return size;
}
public void setRecordOffsets(java.util.Collection<Integer> carriageReturns) {
recordEndOffsets = new int[carriageReturns.size()];
int i = 0;
for (Integer offset : carriageReturns)
recordEndOffsets[i++] = offset;
}
public int getLength() {
return data.length;
}
}