/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package xni.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.StringTokenizer;
import org.apache.xerces.util.NamespaceSupport;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDTDContentModelHandler;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLInputSource;
/**
* This example is a very simple parser configuration that can
* parse files with comma-separated values (CSV) to generate XML
* events. For example, the following CSV document:
* <pre>
* Andy Clark,16 Jan 1973,Cincinnati
* </pre>
* produces the following XML "document" as represented by the
* XNI streaming document information:
* <pre>
* <?xml version='1.0' encoding='UTF-8' standalone='true'?>
* <!DOCTYPE csv [
* <!ELEMENT csv (row)*>
* <!ELEMENT row (col)*>
* <!ELEMENT col (#PCDATA)>
* ]>
* <csv>
* <row>
* <col>Andy Clark</col>
* <col>16 Jan 1973</col>
* <col>Cincinnati</col>
* </row>
* </csv>
* </pre>
*
* @author Andy Clark, IBM
*
* @version $Id: CSVConfiguration.java 447690 2006-09-19 02:41:53Z mrglavas $
*/
public class CSVConfiguration
extends AbstractConfiguration {
//
// Constants
//
/** A QName for the <csv> element name. */
protected static final QName CSV = new QName(null, null, "csv", null);
/** A QName for the <row> element name. */
protected static final QName ROW = new QName(null, null, "row", null);
/** A QName for the <col> element name. */
protected static final QName COL = new QName(null, null, "col", null);
/** An empty list of attributes. */
protected static final XMLAttributes EMPTY_ATTRS = new XMLAttributesImpl();
/** A newline XMLString. */
private final XMLString NEWLINE = new XMLStringBuffer("\n");
/** A newline + one space XMLString. */
private final XMLString NEWLINE_ONE_SPACE = new XMLStringBuffer("\n ");
/** A newline + two spaces XMLString. */
private final XMLString NEWLINE_TWO_SPACES = new XMLStringBuffer("\n ");
//
// Data
//
/**
* A string buffer for use in copying string into an XMLString
* object for passing to the characters method.
*/
private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
//
// XMLParserConfiguration methods
//
/**
* Parse an XML document.
* <p>
* The parser can use this method to instruct this configuration
* to begin parsing an XML document from any valid input source
* (a character stream, a byte stream, or a URI).
* <p>
* Parsers may not invoke this method while a parse is in progress.
* Once a parse is complete, the parser may then parse another XML
* document.
* <p>
* This method is synchronous: it will not return until parsing
* has ended. If a client application wants to terminate
* parsing early, it should throw an exception.
*
* @param source The input source for the top-level of the
* XML document.
*
* @exception XNIException Any XNI exception, possibly wrapping
* another exception.
* @exception IOException An IO exception from the parser, possibly
* from a byte stream or character stream
* supplied by the parser.
*/
public void parse(XMLInputSource source)
throws IOException, XNIException {
// get reader
openInputSourceStream(source);
Reader reader = source.getCharacterStream();
if (reader == null) {
InputStream stream = source.getByteStream();
reader = new InputStreamReader(stream);
}
BufferedReader bufferedReader = new BufferedReader(reader);
// start document
if (fDocumentHandler != null) {
fDocumentHandler.startDocument(null, "UTF-8", new NamespaceSupport(), null);
fDocumentHandler.xmlDecl("1.0", "UTF-8", "true", null);
fDocumentHandler.doctypeDecl("csv", null, null, null);
}
if (fDTDHandler != null) {
fDTDHandler.startDTD(null, null);
fDTDHandler.elementDecl("csv", "(row)*", null);
fDTDHandler.elementDecl("row", "(col)*", null);
fDTDHandler.elementDecl("col", "(#PCDATA)", null);
}
if (fDTDContentModelHandler != null) {
fDTDContentModelHandler.startContentModel("csv", null);
fDTDContentModelHandler.startGroup(null);
fDTDContentModelHandler.element("row", null);
fDTDContentModelHandler.endGroup(null);
short csvOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
fDTDContentModelHandler.occurrence(csvOccurs, null);
fDTDContentModelHandler.endContentModel(null);
fDTDContentModelHandler.startContentModel("row", null);
fDTDContentModelHandler.startGroup(null);
fDTDContentModelHandler.element("col", null);
fDTDContentModelHandler.endGroup(null);
short rowOccurs = XMLDTDContentModelHandler.OCCURS_ZERO_OR_MORE;
fDTDContentModelHandler.occurrence(rowOccurs, null);
fDTDContentModelHandler.endContentModel(null);
fDTDContentModelHandler.startContentModel("col", null);
fDTDContentModelHandler.startGroup(null);
fDTDContentModelHandler.pcdata(null);
fDTDContentModelHandler.endGroup(null);
fDTDContentModelHandler.endContentModel(null);
}
if (fDTDHandler != null) {
fDTDHandler.endDTD(null);
}
if (fDocumentHandler != null) {
fDocumentHandler.startElement(CSV, EMPTY_ATTRS, null);
}
// read lines
String line;
while ((line = bufferedReader.readLine()) != null) {
if (fDocumentHandler != null) {
fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
fDocumentHandler.startElement(ROW, EMPTY_ATTRS, null);
StringTokenizer tokenizer = new StringTokenizer(line, ",");
while (tokenizer.hasMoreTokens()) {
fDocumentHandler.ignorableWhitespace(NEWLINE_TWO_SPACES, null);
fDocumentHandler.startElement(COL, EMPTY_ATTRS, null);
String token = tokenizer.nextToken();
fStringBuffer.clear();
fStringBuffer.append(token);
fDocumentHandler.characters(fStringBuffer, null);
fDocumentHandler.endElement(COL, null);
}
fDocumentHandler.ignorableWhitespace(NEWLINE_ONE_SPACE, null);
fDocumentHandler.endElement(ROW, null);
}
}
bufferedReader.close();
// end document
if (fDocumentHandler != null) {
fDocumentHandler.ignorableWhitespace(NEWLINE, null);
fDocumentHandler.endElement(CSV, null);
fDocumentHandler.endDocument(null);
}
} // parse(XMLInputSource)
// NOTE: The following methods are overloaded to ignore setting
// of parser state so that this configuration does not
// throw configuration exceptions for features and properties
// that it doesn't care about.
public void setFeature(String featureId, boolean state) {}
public boolean getFeature(String featureId) { return false; }
public void setProperty(String propertyId, Object value) {}
public Object getProperty(String propertyId) { return null; }
} // class CSVConfiguration