/**
* Copyright 2015 StreamSets Inc.
*
* Licensed under the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.streamsets.pipeline.lib.parser.xml;
import com.streamsets.pipeline.api.Field;
import com.streamsets.pipeline.api.OnRecordError;
import com.streamsets.pipeline.api.Record;
import com.streamsets.pipeline.api.Stage;
import com.streamsets.pipeline.api.ext.io.OverrunReader;
import com.streamsets.pipeline.lib.parser.DataParser;
import com.streamsets.pipeline.lib.xml.StreamingXmlParser;
import com.streamsets.pipeline.sdk.ContextInfoCreator;
import com.streamsets.testing.ApiUtils;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class TestXmlCharDataParser {
public static final String BOOK_XML_NO_NAMESPACE =
"<bookstore>\n" +
" <book>\n" +
" <title lang=\"en\">Harry Potter</title>\n" +
" <price>29.99</price>\n" +
" </book>\n" +
" <book>\n" +
" <title lang=\"en_us\">Learning XML</title>\n" +
" <price>39.95</price>\n" +
" </book>\n" +
"</bookstore>";
private Stage.Context getContext() {
return ContextInfoCreator.createSourceContext("i", false, OnRecordError.TO_ERROR, Collections.EMPTY_LIST);
}
@Test
public void testParse() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("<r><e>Hello</e><e>Bye</e></r>"), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, "e", 100);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::0", record.getHeader().getSourceId());
Assert.assertEquals("Hello", record.get().getValueAsMap().get("value").getValueAsString());
Assert.assertEquals(18, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::18", record.getHeader().getSourceId());
Assert.assertEquals("Bye", record.get().getValueAsMap().get("value").getValueAsString());
Assert.assertEquals(29, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals(-1, Long.parseLong(parser.getOffset()));
parser.close();
}
@Test
public void testXpathWithoutDelimiterElement() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader(BOOK_XML_NO_NAMESPACE), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, "", true, 1000);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
List<Field> books = record.get().getValueAsMap().get("book").getValueAsList();
assertBookRecord(
"/bookstore/book[0]",
"Harry Potter",
"en",
"29.99",
books.get(0).getValueAsMap().get("title"),
books.get(0).getValueAsMap().get("price")
);
assertBookRecord(
"/bookstore/book[1]",
"Learning XML",
"en_us",
"39.95",
books.get(1).getValueAsMap().get("title"),
books.get(1).getValueAsMap().get("price")
);
parser.close();
}
@Test
public void testXpathWithDelimiterElement() throws Exception {
//ensure the output xpath remains consistent regardless of record path
for (String delimiter : Arrays.asList("/bookstore/book", "book", "/*[1]/*")) {
OverrunReader reader = new OverrunReader(new StringReader(BOOK_XML_NO_NAMESPACE), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, delimiter, true, 1000);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
assertBookRecord("/bookstore/book", "Harry Potter", "en", "29.99", record);
record = parser.parse();
assertBookRecord("/bookstore/book", "Learning XML", "en_us", "39.95", record);
parser.close();
}
}
@Test
public void testOldStyleParserOutput() throws Exception {
// backwards compatibility for SDC-5407
for (String delimiter : Arrays.asList("/bookstore/book", "book", "/*[1]/*")) {
OverrunReader reader = new OverrunReader(new StringReader(BOOK_XML_NO_NAMESPACE), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, delimiter, true, null, 1000, false);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
assertBookRecordOldStyle(
"/bookstore/book",
"Harry Potter",
"en",
"29.99",
record.get("/title"),
record.get("/price"),
"",
"",
""
);
record = parser.parse();
assertBookRecordOldStyle(
"/bookstore/book",
"Learning XML",
"en_us",
"39.95",
record.get("/title"),
record.get("/price"),
"",
"",
""
);
parser.close();
}
}
private static final String NAMESPACE1_URI = "http://namespace1.com";
private static final String NAMESPACE2_URI = "http://namespace2.com";
private static final String NAMESPACE3_URI = "http://namespace3.com";
// this namespace had no prefix in input doc
private static final String NAMESPACE1_OUTPUT_PREFIX = "ns1";
// namespace prefix from input doc should be preserved
private static final String NAMESPACE2_OUTPUT_PREFIX = "books";
// final namespace also has no prefix in input doc
private static final String NAMESPACE3_OUTPUT_PREFIX = "ns2";
@Test
public void testXpathWithDelimiterElementNamespaced() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader(
"<bookstore xmlns=\""+NAMESPACE1_URI+"\" xmlns:"+NAMESPACE2_OUTPUT_PREFIX+"=\""+NAMESPACE2_URI+"\">\n"+
" <"+NAMESPACE2_OUTPUT_PREFIX+":book>\n" +
" <title xmlns=\"" + NAMESPACE3_URI + "\" lang=\"en\">Harry Potter</title>\n" +
" <price xmlns=\"" + NAMESPACE3_URI + "\">29.99</price>\n" +
" </"+NAMESPACE2_OUTPUT_PREFIX+":book>\n" +
" <"+NAMESPACE2_OUTPUT_PREFIX+":book>\n" +
" <title xmlns=\"" + NAMESPACE3_URI + "\" lang=\"en_us\">Learning XML</title>\n" +
" <price xmlns=\"" + NAMESPACE3_URI + "\">39.95</price>\n" +
" </"+NAMESPACE2_OUTPUT_PREFIX+":book>\n" +
"</bookstore>"
), 1000, true, false);
final Map<String, String> namespaces = new HashMap<>();
namespaces.put("bs", NAMESPACE1_URI);
namespaces.put("b", NAMESPACE2_URI);
DataParser parser = new XmlCharDataParser(
getContext(),
"id",
reader,
0,
"/bs:bookstore/b:book",
true,
namespaces,
1000,
true
);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
assertBookRecord(
"/ns1:bookstore/"+NAMESPACE2_OUTPUT_PREFIX+":book",
"Harry Potter",
"en",
"29.99",
record,
"ns2:",
"",
"ns2:"
);
assertNamespacedBookRecordHeaders(record);
record = parser.parse();
assertBookRecord(
"/ns1:bookstore/"+NAMESPACE2_OUTPUT_PREFIX+":book",
"Learning XML",
"en_us",
"39.95",
record,
"ns2:",
"",
"ns2:"
);
assertNamespacedBookRecordHeaders(record);
parser.close();
}
@Test
public void testXpathWithDelimiterElementNonNested() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader(
"<root>\n" +
" <something attr1=\"attrVal1-1\" attr2=\"attrVal2-1\">1</something>\n" +
" <something attr1=\"attrVal1-2\" attr2=\"attrVal2-2\">2</something>\n" +
"</root>"
), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, "something", true, null, 1000, false);
Assert.assertEquals(0, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Field attr1 = record.get("/"+StreamingXmlParser.ATTR_PREFIX_KEY+"attr1");
Assert.assertEquals("attrVal1-1", attr1.getValueAsString());
Assert.assertNotNull(attr1.getAttributes());
Assert.assertEquals(1, attr1.getAttributes().size());
Assert.assertEquals("/root/something/@attr1", attr1.getAttribute(StreamingXmlParser.XPATH_KEY));
Field attr2 = record.get("/"+StreamingXmlParser.ATTR_PREFIX_KEY+"attr2");
Assert.assertEquals("attrVal2-1", attr2.getValueAsString());
Assert.assertNotNull(attr2.getAttributes());
Assert.assertEquals(1, attr2.getAttributes().size());
Assert.assertEquals("/root/something/@attr2", attr2.getAttribute(StreamingXmlParser.XPATH_KEY));
Field value = record.get("/"+StreamingXmlParser.VALUE_KEY);
Assert.assertEquals("1", value.getValueAsString());
Assert.assertNotNull(value.getAttributes());
Assert.assertEquals(1, value.getAttributes().size());
Assert.assertEquals("/root/something", value.getAttribute(StreamingXmlParser.XPATH_KEY));
record = parser.parse();
attr1 = record.get("/"+StreamingXmlParser.ATTR_PREFIX_KEY+"attr1");
Assert.assertEquals("attrVal1-2", attr1.getValueAsString());
Assert.assertNotNull(attr1.getAttributes());
Assert.assertEquals(1, attr1.getAttributes().size());
Assert.assertEquals("/root/something/@attr1", attr1.getAttribute(StreamingXmlParser.XPATH_KEY));
attr2 = record.get("/"+StreamingXmlParser.ATTR_PREFIX_KEY+"attr2");
Assert.assertEquals("attrVal2-2", attr2.getValueAsString());
Assert.assertNotNull(attr2.getAttributes());
Assert.assertEquals(1, attr2.getAttributes().size());
Assert.assertEquals("/root/something/@attr2", attr2.getAttribute(StreamingXmlParser.XPATH_KEY));
value = record.get("/"+StreamingXmlParser.VALUE_KEY);
Assert.assertEquals("2", value.getValueAsString());
Assert.assertNotNull(value.getAttributes());
Assert.assertEquals(1, value.getAttributes().size());
Assert.assertEquals("/root/something", value.getAttribute(StreamingXmlParser.XPATH_KEY));
parser.close();
}
private static void assertBookRecord(
String bookXpath,
String bookTitle,
String lang,
String price,
Record record
) {
Assert.assertNotNull(record);
assertBookRecord(bookXpath, bookTitle, lang, price, record.get("/title"), record.get("/price"), "", "", "");
}
private static void assertBookRecord(
String bookXpath,
String bookTitle,
String lang,
String price,
Record record,
String titleXpathPrefix,
String langXpathPrefix,
String priceXpathPrefix
) {
Assert.assertNotNull(record);
assertBookRecord(
bookXpath,
bookTitle,
lang,
price,
record.get("/" + titleXpathPrefix + "title"),
record.get("/" + priceXpathPrefix + "price"),
titleXpathPrefix,
langXpathPrefix,
priceXpathPrefix
);
}
private static void assertBookRecord(
String bookXpath,
String bookTitle,
String bookLang,
String bookPrice,
Field title,
Field price
) {
assertBookRecord(bookXpath, bookTitle, bookLang, bookPrice, title, price, "", "", "");
}
private static void assertBookRecord(
String bookXpath,
String bookTitle,
String bookLang,
String bookPrice,
Field title,
Field price,
String titleXpathPrefix,
String langXpathPrefix,
String priceXpathPrefix
) {
Assert.assertNotNull(title);
Assert.assertNotNull(price);
Map<String, Field> titleMap = ApiUtils.firstItemAsMap(title);
Field titleValueField = titleMap.get(StreamingXmlParser.VALUE_KEY);
Assert.assertEquals(bookTitle, titleValueField.getValueAsString());
Assert.assertNotNull(titleValueField.getAttributes());
Assert.assertEquals(1, titleValueField.getAttributes().size());
String titleXpath = titleValueField.getAttribute(StreamingXmlParser.XPATH_KEY);
Assert.assertEquals(bookXpath + "/" + titleXpathPrefix + "title", titleXpath);
Field titleField = title.getValueAsList().get(0);
String langField = titleField.getAttribute(StreamingXmlParser.XMLATTR_ATTRIBUTE_PREFIX+"lang");
Assert.assertEquals(bookLang, langField);
Map<String, Field> priceMap = ApiUtils.firstItemAsMap(price);
Field priceField = priceMap.get(StreamingXmlParser.VALUE_KEY);
Assert.assertEquals(bookPrice, priceField.getValueAsString());
String priceXpath = priceField.getAttribute(StreamingXmlParser.XPATH_KEY);
Assert.assertEquals(bookXpath + "/" + priceXpathPrefix + "price", priceXpath);
}
private static void assertBookRecordOldStyle(
String bookXpath,
String bookTitle,
String bookLang,
String bookPrice,
Field title,
Field price,
String titleXpathPrefix,
String langXpathPrefix,
String priceXpathPrefix
) {
Assert.assertNotNull(title);
Assert.assertNotNull(price);
Map<String, Field> titleMap = ApiUtils.firstItemAsMap(title);
Field titleField = titleMap.get(StreamingXmlParser.VALUE_KEY);
Assert.assertEquals(bookTitle, titleField.getValueAsString());
Assert.assertNotNull(titleField.getAttributes());
Assert.assertEquals(1, titleField.getAttributes().size());
String titleXpath = titleField.getAttribute(StreamingXmlParser.XPATH_KEY);
Assert.assertEquals(bookXpath + "/" + titleXpathPrefix + "title", titleXpath);
Field langField = titleMap.get(StreamingXmlParser.ATTR_PREFIX_KEY+"lang");
Assert.assertEquals(bookLang, langField.getValueAsString());
Assert.assertNotNull(langField.getAttributes());
Assert.assertEquals(1, langField.getAttributes().size());
String langXpath = langField.getAttribute(StreamingXmlParser.XPATH_KEY);
Assert.assertEquals(bookXpath + "/"+ titleXpathPrefix + "title/@" + langXpathPrefix + "lang", langXpath);
Map<String, Field> priceMap = ApiUtils.firstItemAsMap(price);
Field priceField = priceMap.get(StreamingXmlParser.VALUE_KEY);
Assert.assertEquals(bookPrice, priceField.getValueAsString());
String priceXpath = priceField.getAttribute(StreamingXmlParser.XPATH_KEY);
Assert.assertEquals(bookXpath + "/" + priceXpathPrefix + "price", priceXpath);
}
private static void assertNamespacedBookRecordHeaders(Record record) {
Record.Header header = record.getHeader();
Assert.assertEquals(
NAMESPACE1_URI,
header.getAttribute(XmlCharDataParser.RECORD_ATTRIBUTE_NAMESPACE_PREFIX+NAMESPACE1_OUTPUT_PREFIX)
);
Assert.assertEquals(
NAMESPACE2_URI,
header.getAttribute(XmlCharDataParser.RECORD_ATTRIBUTE_NAMESPACE_PREFIX+NAMESPACE2_OUTPUT_PREFIX)
);
Assert.assertEquals(
NAMESPACE3_URI,
header.getAttribute(XmlCharDataParser.RECORD_ATTRIBUTE_NAMESPACE_PREFIX+NAMESPACE3_OUTPUT_PREFIX)
);
}
@Test
public void testParseWithOffset() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("<r><e>Hello</e><e>Bye</e></r>"), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 18, "e", 100);
Assert.assertEquals(18, Long.parseLong(parser.getOffset()));
Record record = parser.parse();
Assert.assertNotNull(record);
Assert.assertEquals("id::18", record.getHeader().getSourceId());
Assert.assertEquals("Bye", record.get().getValueAsMap().get("value").getValueAsString());
Assert.assertEquals(29, Long.parseLong(parser.getOffset()));
record = parser.parse();
Assert.assertNull(record);
Assert.assertEquals(-1, Long.parseLong(parser.getOffset()));
parser.close();
}
@Test(expected = IOException.class)
public void testClose() throws Exception {
OverrunReader reader = new OverrunReader(new StringReader("<r><e>Hello</e><e>Bye</e></r>"), 1000, true, false);
DataParser parser = new XmlCharDataParser(getContext(), "id", reader, 0, "e", 100);
parser.close();
parser.parse();
}
}