/*
* XmlParser.java
*
* Copyright (C) 2005-2009 Tommi Laukkanen
* http://www.substanceofcode.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.substanceofcode.utils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
/**
* Simple and lightweight XML parser without complete error handling.
*
* @author Tommi Laukkanen (tlaukkanen at gmail dot com)
*/
public class XmlParser {
CustomInputStream inputStream = null;
static String encoding = "UTF-8";
/** Current XML element name (eg. <title> = title) */
String currentElementName = "";
String currentElementData = "";
boolean currentElementContainsText = false;
/** Enumerations for parse function */
public static final int END_DOCUMENT = 0;
public static final int ELEMENT = 1;
/**
* Creates a new instance of XmlParser
* @param inputStream Stream containing XML document.
*/
public XmlParser(CustomInputStream inputStream) {
this.inputStream = inputStream;
}
public XmlParser(String xml) {
InputStream is;
try {
is = new ByteArrayInputStream(xml.getBytes("UTF-8"));
} catch (UnsupportedEncodingException ex) {
is = new ByteArrayInputStream(xml.getBytes());
}
CustomInputStream cis = new CustomInputStream(is);
inputStream = cis;
}
public String getRawData() {
return inputStream.getText();
}
/**
* Parse next element
* @return Element type or end-of-document.
* @throws java.io.IOException
*/
public int parse() throws IOException {
boolean parsingElementName = false;
boolean elementFound = false;
boolean elementStart = false;
boolean parsingElementData = false;
int inputCharacter;
char c;
inputCharacter = inputStream.read();
while (inputCharacter != -1 && elementFound==false) {
c = (char)inputCharacter;
if(c=='/' && elementStart==true) {
parsingElementName = false;
}
else if(elementStart==true && (c=='?' || c=='!')) {
if(currentElementData.charAt(currentElementData.length()-1)=='<') {
parsingElementName = false;
}
}
if(parsingElementName==true) {
if(c==' ' || c=='/' ) {
parsingElementName = false;
parsingElementData = true;
}
else if(c!='>') {
currentElementName += c;
}
}
if(c=='<') {
elementStart = true;
parsingElementName = true;
parsingElementData = true;
currentElementName = "";
currentElementData = "";
}
if(parsingElementData==true) {
currentElementData += c;
}
if(c=='>') {
if(currentElementName.length()>0) {
elementFound = true;
parsingElementName = false;
}
}
if(!elementFound){
inputCharacter = inputStream.read();
}
}
if( currentElementData.length()>2 &&
currentElementData.charAt( currentElementData.length()-2 )=='/' &&
currentElementData.charAt( currentElementData.length()-1 )=='>' ) {
currentElementContainsText = false;
} else {
currentElementContainsText = true;
}
if( inputCharacter==-1 ) {
return END_DOCUMENT;
} else {
return ELEMENT;
}
}
/** Get element name */
public String getName() {
return currentElementName;
}
/** Get inner XML */
public String getInnerXml() throws IOException {
return getText();
}
/** Get outer XML */
public String getOuterXml() throws IOException {
return "<" + currentElementName + ">"
+ getInnerXml()
+ "</" + currentElementName + ">";
}
/** Get element text including inner xml */
public String getText() throws IOException {
//Log.debug("Getting text for element '" + currentElementName + "'");
if(currentElementContainsText==false) {
return "";
}
boolean endParsing = false;
String text;
StringBuffer textBuffer = new StringBuffer();
int inputCharacter;
char c;
char lastChars[] = new char[3];
lastChars[0] = ' ';
lastChars[1] = ' ';
lastChars[2] = ' ';
char elementNameChars[] = new char[2];
elementNameChars[0] = currentElementName.charAt( currentElementName.length()-2 );
elementNameChars[1] = currentElementName.charAt( currentElementName.length()-1 );
while ((inputCharacter = inputStream.read()) != -1 && endParsing==false) {
c = (char)inputCharacter;
lastChars[0] = lastChars[1];
lastChars[1] = c;
//System.out.print(c);
textBuffer.append(c);
if( lastChars[0] == elementNameChars[0] &&
lastChars[1] == elementNameChars[1]) {
if( textBuffer.toString().endsWith("</" + currentElementName)) {
endParsing = true;
}
}
}
if (encoding.equals("")) {
text = textBuffer.toString();
} else {
try {
text = new String(textBuffer.toString().getBytes(), encoding);
} catch (UnsupportedEncodingException e) {
Log.add("Couldn't use UTF-8 encoding");
try {
text = new String(textBuffer.toString().getBytes(), "UTF8");
encoding = "UTF8";
} catch (UnsupportedEncodingException e2) {
Log.add("Couldn't use UTF8 encoding");
text = textBuffer.toString();
encoding = "";
}
}
}
text = textBuffer.toString();
text = StringUtil.replace(text, "</" + currentElementName, "");
/** Handle some entities and encoded characters */
//Log.add("GetText() before: " + text);
text = decodeCharacters(text);
//Log.add("GetText() after : " + text);
return text;
}
/**
* Get attribute value from current element
*/
public String getAttributeValue(String attributeName) {
/** Check whatever the element contains given attribute */
int attributeStartIndex = currentElementData.indexOf(attributeName);
if( attributeStartIndex<0 ) {
return null;
}
/** Calculate actual value start index */
int valueStartIndex = attributeStartIndex + attributeName.length() + 2;
/** Check the attribute value end index */
int valueEndIndex = currentElementData.indexOf("\"", valueStartIndex);
if( valueEndIndex<0 ) {
return null;
}
/** Parse value */
String value = currentElementData.substring(valueStartIndex, valueEndIndex);
value = decodeCharacters(value);
return value;
}
private String decodeCharacters(String text) {
text = StringUtil.replace(text, "<", "<");
text = StringUtil.replace(text, ">", ">");
text = StringUtil.replace(text, " ", " ");
text = StringUtil.replace(text, """, "\"");
text = StringUtil.replace(text, "&", "&");
text = StringUtil.replace(text, "ä", "ä");
text = StringUtil.replace(text, "ö", "ö");
text = StringUtil.replace(text, "ä", "ä");
text = StringUtil.replace(text, "ö", "ö");
text = StringUtil.replace(text, "â??", "'");
text = StringUtil.replace(text, "’", "'");
text = StringUtil.replace(text, "‘", "'");
text = StringUtil.replace(text, "“", "\"");
text = StringUtil.replace(text, "”", "\"");
text = StringUtil.replace(text, "'", "\"");
text = StringUtil.replace(text, "€", "€");
text = StringUtil.replace(text, String.valueOf((char)226) + String.valueOf((char)128) + String.valueOf((char)153), "'");
text = StringUtil.replace(text, String.valueOf((char)226) + String.valueOf((char)128) + String.valueOf((char)166), "...");
text = StringUtil.replace(text, String.valueOf((char)226) + String.valueOf((char)128) + String.valueOf((char)156), "\"");
text = StringUtil.replace(text, String.valueOf((char)226) + String.valueOf((char)128) + String.valueOf((char)157), "\"");
boolean foundEscape = text.indexOf("")>=0;
int startIndex = 0;
while(foundEscape) {
int entityStart = text.indexOf("");
int entityEnd = text.indexOf(";",entityStart);
if(entityStart>=0 && entityEnd>0) {
String character = text.substring(entityStart+2, entityEnd);
//System.out.print("char:" + character);
try {
int charValue = 0;
charValue = Integer.parseInt(character);
//System.out.println("char-val:" + charValue);
if(charValue>0) {
text = StringUtil.replace(text, "" + character + ";", String.valueOf((char)charValue));
}
} catch(Exception ex) {
// Do nothing...
//System.out.println("char ex:" + ex.toString());
}
}
startIndex++;
if(startIndex<text.length()) {
foundEscape = text.indexOf("", startIndex)>=0;
} else {
foundEscape = false;
}
}
return text;
}
}