package eu.dnetlib.iis.wf.collapsers;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.List;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.IndexedRecord;
import com.google.common.collect.Lists;
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadata;
import eu.dnetlib.iis.collapsers.schemas.DocumentMetadataEnvelope;
import eu.dnetlib.iis.collapsers.schemas.DocumentTextEnvelope;
import eu.dnetlib.iis.collapsers.schemas.PublicationType;
import eu.dnetlib.iis.metadataextraction.schemas.DocumentText;
/**
*
* @author Dominika Tkaczyk
*/
public class SampleData {
/* parameters */
public static final List<String> origins = Lists.newArrayList("origin1", "origin2");
public static final List<String> significantFields = Lists.newArrayList("title", "authorIds", "abstract", "journal", "year");
/* input records */
public static final DocumentMetadata metadataRecord11 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 1")
.setLanguage("en")
.setYear(1990)
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadataEnvelope envMetadataRecord11 = DocumentMetadataEnvelope.newBuilder()
.setOrigin("origin1")
.setData(metadataRecord11).build();
public static final DocumentMetadata metadataRecord12 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.setTitle("title 2")
.setYear(1991)
.build();
public static final DocumentMetadataEnvelope envMetadataRecord12 = DocumentMetadataEnvelope.newBuilder()
.setOrigin("origin1")
.setData(metadataRecord12).build();
public static final DocumentMetadata metadataRecord13 = DocumentMetadata.newBuilder()
.setId("id-1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadataEnvelope envMetadataRecord13 = DocumentMetadataEnvelope.newBuilder()
.setOrigin("origin1")
.setData(metadataRecord13).build();
public static final DocumentMetadata metadataRecord21 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 3")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.setTitle("title 3")
.setYear(1999)
.build();
public static final DocumentMetadataEnvelope envMetadataRecord21 = DocumentMetadataEnvelope.newBuilder()
.setOrigin("origin2")
.setData(metadataRecord21).build();
public static final DocumentMetadata metadataRecord22 = DocumentMetadata.newBuilder()
.setId("id-1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadataEnvelope envMetadataRecord22 = DocumentMetadataEnvelope.newBuilder()
.setOrigin("origin2")
.setData(metadataRecord22).build();
public static final DocumentText textRecord = DocumentText.newBuilder()
.setId("text-1")
.setText("text text")
.build();
public static final DocumentTextEnvelope envTextRecord = DocumentTextEnvelope.newBuilder()
.setOrigin("origin1")
.setData(textRecord).build();
/* merged records */
public static final DocumentMetadata mergedRecord1112 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 1")
.setLanguage("en")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setTitle("title 2")
.setYear(1990)
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadata mergedRecord1211 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setTitle("title 2")
.setLanguage("en")
.setYear(1991)
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadata mergedRecord1121 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 1")
.setLanguage("en")
.setYear(1990)
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
.setTitle("title 3")
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static final DocumentMetadata mergedRecord2221 = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 3")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 13", "aut 23"))
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.setTitle("title 3")
.setYear(1999)
.build();
/* collapsed records */
// within no merge, between no merge
public static final DocumentMetadata recordWNoMergeBNoMerge = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.setTitle("title 2")
.setYear(1991)
.build();
// within merge, between no merge
public static final DocumentMetadata recordWMergeBNoMerge = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setLanguage("en")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setTitle("title 2")
.setYear(1991)
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
// within no merge, between merge
public static final DocumentMetadata recordWNoMergeBMerge = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.setTitle("title 2")
.setYear(1991)
.build();
// within merge, between merge
public static final DocumentMetadata recordWMergeBMerge = DocumentMetadata.newBuilder()
.setId("id-1")
.setAbstract$("abstract 2")
.setLanguage("en")
.setAuthorIds(Lists.newArrayList((CharSequence)"aut 1", "aut 2"))
.setTitle("title 2")
.setYear(1991)
.setKeywords(Lists.newArrayList((CharSequence)"kwd 1", "kwd 2"))
.setDatasourceIds(Lists.newArrayList((CharSequence)"d 1", "d 2"))
.setPublisher("publisher 1")
.setPublicationType(PublicationType.newBuilder().setArticle(true).build())
.build();
public static void assertEqualRecords(IndexedRecord expected, IndexedRecord actual) {
assertEquals("Records are not equal: \nExpected: " + expected + "\nActual: " + actual + "\n",
0, GenericData.get().compare(expected, actual, expected.getSchema()));
}
public static <T extends IndexedRecord> void assertEqualRecords(List<T> expected, List<T> actual) {
assertEquals("Records lists have different sizes: " + expected.size() + " and " + actual.size() + "\n",
expected.size(), actual.size());
List<T> actualCopy = Lists.newArrayList(actual);
for (T exp : expected) {
T found = null;
for (T act : actualCopy) {
if (0 == GenericData.get().compare(exp, act, exp.getSchema())) {
found = act;
}
}
assertTrue(
"Expected record " + exp.toString() + " was not found among the actual records\n",
found != null);
actualCopy.remove(found);
}
}
}