package com.nexr.platform.search.parser;
import com.nexr.platform.collector.record.LogRecord;
import com.nexr.platform.collector.record.LogRecordKey;
import com.nexr.platform.search.utils.io.AppendRootInputStream;
import com.nexr.platform.search.utils.io.MapFileWriter;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import java.io.File;
import java.io.IOException;
/**
* Xml 형태로 된, SDP Data 를 읽어와 Hadoop File System 형식의 LogRecordKey, LogRecord 의 Map 형태로 저장 한다.
* David.Woo - 2011.07.26
*/
public class SdpDataParser implements DataParser {
private final String _SEPARATOR = ".";
private XMLEventReader _xmlEventReader;
private MapFileWriter _mapFileWriter;
// private Map<String, String> _mapColumnData;
/**
* Constructor
* @param mapFilePath File System 을 저장 할 경로 ( 디렉 토리 )
* @param xmlFilePath Xml File 경로
*/
public SdpDataParser(String mapFilePath, String xmlFilePath){
File mapFile = new File(mapFilePath);
if(!mapFile.isDirectory()) mapFile.mkdirs();
_mapFileWriter = new MapFileWriter(mapFilePath);
// _mapColumnData = new HashMap<String, String>();
try {
XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
_xmlEventReader = xmlInputFactory.createXMLEventReader(AppendRootInputStream.createInputStream(xmlFilePath, "root"));
_mapFileWriter.open();
// this.loadColumnFile(columnFilePath);
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* Xml Data Node Name 을 컨버팅 한다.
* File 의 형태는 [원본 노드명] "\t" [바뀔 노드명]이 된다.
* Ex > TransactionLog.SdpHeader.test TSL.SHD.test
*
* @param columnFilePath Column Define File Path
* @throws IOException Column File Load Error
*/
/*private void loadColumnFile(String columnFilePath) throws IOException {
File file = new File(columnFilePath);
String _ENCODING = "UTF-8";
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), _ENCODING));
String row;
while((row = reader.readLine()) != null) {
if(!row.isEmpty()){
String _SPLIT = "\t";
String[] cols = row.split(_SPLIT);
if(cols.length > 1) _mapColumnData.put(cols[0], cols[1]);
}
}
}*/
/**
* 시작
*/
public void start() {
LogRecord record = new LogRecord();
int i = 0;
String firstKey = "TXLG";
try {
while(this._xmlEventReader.hasNext()){
XMLEvent event = this._xmlEventReader.nextEvent();
if(this.isStartNode(event, firstKey)){
record = new LogRecord();
continue;
}
for(COLUMN_LIST COLUMN : COLUMN_LIST.values()){
if(this.isStartNode(event, COLUMN.name())){
String columnName = COLUMN.name();
String keyValue = firstKey + _SEPARATOR + columnName;
record = this.parseToAttribute(record, keyValue, columnName);
}
}
if(this.isEndNode(event, firstKey)){
LogRecordKey logRecordKey = new LogRecordKey();
logRecordKey.setLogId(String.format("%09d", i++));
//System.out.println(i++);
// logRecordKey.setLogId(UUID.randomUUID().toString());
logRecordKey.setTime(String.valueOf(System.currentTimeMillis()));
logRecordKey.setDataType("");
_mapFileWriter.getMapFileWriter().append(logRecordKey, record);
}
}
} catch(Exception e) {
e.printStackTrace();
}
}
/**
* 컬럼의 각 부분 대표 명칭에 대한 Enum
*/
private enum COLUMN_LIST{
SHD,
DHD,
BD
}
/**
* 종료
* @throws XMLStreamException Xml Close Error
* @throws IOException close Error
*/
public void close() throws XMLStreamException, IOException {
_xmlEventReader.close();
_mapFileWriter.close();
}
/**
* Xml Data 의 시작 노드 인지를 분별 한다.
* @param event XmlEvent
* @param columnName Xml Tag Name
* @return true, false
* @throws XMLStreamException XmlRead Error
*/
private boolean isStartNode(XMLEvent event, String columnName) throws XMLStreamException {
if(event.isStartElement()){
if(event.asStartElement().getName().getLocalPart().equals(columnName)) {
return true;
}
}
return false;
}
/**
* Xml Data 의 노드의 끝 부분 인지 분별 한다.
* @param event XmlEvent
* @param columnName Xml Tag Name
* @return true, false
* @throws XMLStreamException XmlRead Error
*/
private boolean isEndNode(XMLEvent event, String columnName) throws XMLStreamException {
if(event.isEndElement()){
if(event.asEndElement().getName().getLocalPart().equals(columnName)) {
return true;
}
}
return false;
}
/**
* xml Data 를 Parsing 하여, 하위 노드의 Attribute 의 값을 LogRecord 형태로 리턴 한다.
* @param logRecord 저장될 logRecord.
* @param keyValue LogRecord 에 저장 되는 key 값의 prefix
* @param closedColumnName 닫히는 Xml Key Tag
* @return LogRecord
* @throws XMLStreamException XmlParsing Error
*/
private LogRecord parseToAttribute(LogRecord logRecord, String keyValue, String closedColumnName) throws XMLStreamException {
boolean parseBool = true;
String key = keyValue + _SEPARATOR;
String value = "";
while(parseBool){
XMLEvent event = _xmlEventReader.nextEvent();
if(event.isStartElement()){
key += event.asStartElement().getName().getLocalPart();
} else if(event.isCharacters()) {
value = event.asCharacters().getData();
} else if(event.isEndElement()){
if(event.asEndElement().getName().getLocalPart().equals(closedColumnName)){
parseBool = false;
} else {
logRecord.add(key, value.trim());
key = keyValue + _SEPARATOR;
value = "";
}
}
}
return logRecord;
}
public static void main(String[] args) throws XMLStreamException, IOException {
String mapFilePath, xmlFilePath;
if(args.length > 0) {
mapFilePath = args[0];
xmlFilePath = args[1];
// columnFilePath = args[2];
} else {
mapFilePath = "/Users/david/Data/hdfs/";
xmlFilePath = "/Users/david/Data/generateParseData.log";
// columnFilePath = "/home/david/Data/SearchPlatform/SDP/SdpColumnDefine.txt";
}
System.out.println("***************************************************************************");
System.out.println("[START MAKE FILE SYSTEM DATA.]");
System.out.println("***************************************************************************");
SdpDataParser xmlParser = new SdpDataParser(mapFilePath, xmlFilePath);
xmlParser.start();
xmlParser.close();
}
}