package com.threatconnect.sdk.parser;
import com.threatconnect.sdk.parser.model.Item;
import com.threatconnect.sdk.parser.result.PageResult;
import com.threatconnect.sdk.parser.source.DataSource;
import org.apache.commons.io.IOUtils;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
public abstract class AbstractPagedXMLParser<I extends Item> extends AbstractPagedParser<I>
{
private Document document;
public AbstractPagedXMLParser(final DataSource dataSource)
{
super(dataSource);
}
@Override
protected PageResult<I> parsePage(DataSource dataSource) throws ParserException
{
try (InputStream is = getDataSource().read())
{
// read the xml as a string and allow any xml preproccessing if needed
logger.trace("Loading XML DataSource as a string");
String rawXML = IOUtils.toString(is, "UTF-8");
logger.trace("Preprocessing raw XML String");
String xml = preProcessXML(rawXML);
logger.trace(xml);
// create a document from the processed xml
logger.trace("Converting the XML String to an XML Document object");
document = createDocument(xml);
// process the xml document
logger.trace("Processing XML Document");
return processXmlDocument(document);
}
catch (IOException | ParserConfigurationException | SAXException | XPathExpressionException e)
{
throw new ParserException(e);
}
}
/**
* Allows for any preprocessing of the xml string if needed before it is parsed
*
* @param xml
* @return
*/
protected String preProcessXML(final String xml)
{
return xml;
}
protected Document createDocument(final String xml) throws ParserConfigurationException, SAXException, IOException
{
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
DocumentBuilder builder = domFactory.newDocumentBuilder();
Reader reader = new StringReader(xml);
InputSource inputSource = new InputSource(reader);
return builder.parse(inputSource);
}
protected Document getDocument()
{
return document;
}
/**
* Process the xml document
*
* @param document
* @return
* @throws ParserException
*/
protected abstract PageResult<I> processXmlDocument(Document document)
throws ParserException, XPathExpressionException;
}