package eu.dnetlib.iis.wf.metadataextraction; import static eu.dnetlib.iis.wf.metadataextraction.NlmToDocumentWithBasicMetadataConverter.EMPTY_META; import eu.dnetlib.iis.metadataextraction.schemas.Affiliation; import eu.dnetlib.iis.metadataextraction.schemas.Author; import eu.dnetlib.iis.metadataextraction.schemas.ExtractedDocumentMetadata; import eu.dnetlib.iis.metadataextraction.schemas.ReferenceMetadata; import eu.dnetlib.iis.wf.metadataextraction.NlmToDocumentWithBasicMetadataConverter; import java.util.List; import org.apache.avro.util.Utf8; import org.jdom.Document; import org.jdom.input.SAXBuilder; import static org.junit.Assert.*; import org.junit.Test; /** * {@link NlmToDocumentWithBasicMetadataConverter} test class. * * @author mhorst * @author Dominika Tkaczyk * */ public class NlmToDocumentWithBasicMetadataConverterTest { private static final String testXML = "/eu/dnetlib/iis/wf/metadataextraction/nlm-example.xml"; // --------------------------------------- TESTS --------------------------------------- @Test public void testConvertFull() throws Exception { // given SAXBuilder builder = new SAXBuilder(); Document document = (Document) builder.build(ClassLoader.class.getResourceAsStream(testXML)); String id = "predefinedId"; // execute ExtractedDocumentMetadata result = NlmToDocumentWithBasicMetadataConverter.convertFull(id, document, "text"); // assert assertNotNull(result); assertEquals(id, result.getId()); testAuthorsAndAffiliations(result.getAuthors(), result.getAffiliations()); testReferences(result.getReferences()); assertEquals("Hindawi Publishing Corporation", result.getPublisher()); assertEquals("International Journal of Digital Multimedia Broadcasting", result.getJournal()); assertEquals( "Video Quality Prediction Models Based on Video Content Dynamics for H.264 Video over UMTS Networks", result.getTitle()); assertEquals(2, result.getExternalIdentifiers().size()); assertEquals(new Utf8("10.1155/2010/608138"), result.getExternalIdentifiers().get(new Utf8("doi"))); assertEquals(new Utf8("608138"), result.getExternalIdentifiers().get(new Utf8("hindawi-id"))); assertNotNull(result.getAbstract$()); assertFalse(result.getAbstract$().toString().isEmpty()); assertEquals(2, result.getKeywords().size()); assertTrue(result.getKeywords().contains("keyword 1")); assertTrue(result.getKeywords().contains("keyword 2")); assertEquals(2010, (int) result.getYear()); assertEquals("2010", result.getVolume()); assertEquals("issue-5", result.getIssue()); assertNotNull(result.getPages()); assertEquals("50", result.getPages().getStart()); assertEquals("60", result.getPages().getEnd()); } @Test(expected=RuntimeException.class) public void testConvertFullNoId() throws Exception { // given SAXBuilder builder = new SAXBuilder(); Document document = (Document) builder.build(ClassLoader.class.getResourceAsStream(testXML)); // execute NlmToDocumentWithBasicMetadataConverter.convertFull(null, document, "text"); } @Test public void testConvertFullNoDocument() throws Exception { // given String id = "id"; // execute ExtractedDocumentMetadata result = NlmToDocumentWithBasicMetadataConverter.convertFull(id, null, "text"); // assert assertNotNull(result); assertEquals(id, result.getId()); assertEquals("text", result.getText()); } @Test(expected=RuntimeException.class) public void testCreateEmptyNoId() throws Exception { // execute NlmToDocumentWithBasicMetadataConverter.createEmpty(null); } @Test public void testCreateEmpty() throws Exception { // given String id = "id"; // execute ExtractedDocumentMetadata result = NlmToDocumentWithBasicMetadataConverter.createEmpty(id); // assert assertNotNull(result); assertEquals(id, result.getId()); assertEquals("", result.getText()); assertEquals(EMPTY_META, result.getPublicationTypeName()); } // --------------------------------------- PRIVATE --------------------------------------- private void testAuthorsAndAffiliations(List<Author> authors, List<Affiliation> affiliations) { assertNotNull(authors); assertEquals(3, authors.size()); assertEquals("Lingfen Sun", authors.get(0).getAuthorFullName()); assertEquals("Fidel Liberal", authors.get(1).getAuthorFullName()); assertEquals("Harilaos Koumaras", authors.get(2).getAuthorFullName()); assertNotNull(affiliations); assertEquals(3, affiliations.size()); assertNotNull(authors.get(0).getAffiliationPositions()); assertEquals(2, authors.get(0).getAffiliationPositions().size()); assertEquals( "Centre for Signal Processing and Multimedia Communication, School of Computing, University of Plymouth", affiliations.get(authors.get(0).getAffiliationPositions().get(0)).getOrganization()); assertNotNull(authors.get(1).getAffiliationPositions()); assertEquals(1, authors.get(1).getAffiliationPositions().size()); assertEquals( "Department of Electronics and Telecommunications, University of the Basque Country (UPV/EHU), 48013 Bilbao, Spain", affiliations.get(authors.get(1).getAffiliationPositions().get(0)).getRawText()); assertEquals("Spain", affiliations.get(authors.get(1).getAffiliationPositions().get(0)).getCountryName()); assertEquals("ES", affiliations.get(authors.get(1).getAffiliationPositions().get(0)).getCountryCode()); assertEquals("48013 Bilbao", affiliations.get(authors.get(1).getAffiliationPositions().get(0)).getAddress()); assertEquals("Department of Electronics and Telecommunications, University of the Basque Country (UPV/EHU)", affiliations.get(authors.get(1).getAffiliationPositions().get(0)).getOrganization()); assertNotNull(authors.get(2).getAffiliationPositions()); assertEquals(1, authors.get(2).getAffiliationPositions().size()); assertEquals("Institute of Informatics and Telecommunications, NCSR Demokritos, 15310 Athens, Greece", affiliations.get(authors.get(2).getAffiliationPositions().get(0)).getRawText()); assertEquals("Greece", affiliations.get(authors.get(2).getAffiliationPositions().get(0)).getCountryName()); assertEquals("GR", affiliations.get(authors.get(2).getAffiliationPositions().get(0)).getCountryCode()); assertEquals("15310 Athens", affiliations.get(authors.get(2).getAffiliationPositions().get(0)).getAddress()); assertEquals("Institute of Informatics and Telecommunications, NCSR Demokritos", affiliations.get(authors.get(2).getAffiliationPositions().get(0)).getOrganization()); } private void testReferences(List<ReferenceMetadata> references) { assertNotNull(references); assertEquals(2, references.size()); ReferenceMetadata ref1 = references.get(0); assertEquals(1, (int) ref1.getPosition()); assertEquals( "[1] E. Braunwald , “ Shattuck lecture: cardiovascular medicine at the turn of the millennium: triumphs , concerns, and opportunities,” New England Journal of Medicine , vol. 337 , no. 19 , pp. 1360 – 1369 , 1997 .", ref1.getText()); assertNotNull(ref1.getBasicMetadata()); assertNotNull(ref1.getBasicMetadata().getAuthors()); assertEquals(1, ref1.getBasicMetadata().getAuthors().size()); assertTrue(ref1.getBasicMetadata().getAuthors().contains("Braunwald, E.")); assertEquals("19", ref1.getBasicMetadata().getIssue()); assertEquals("New England Journal of Medicine", ref1.getBasicMetadata().getSource()); assertEquals("Shattuck lecture: cardiovascular medicine at the turn of the millennium: triumphs", ref1.getBasicMetadata().getTitle()); assertEquals("337", ref1.getBasicMetadata().getVolume()); assertEquals("1997", ref1.getBasicMetadata().getYear()); assertNotNull(ref1.getBasicMetadata().getPages()); assertEquals("1360", ref1.getBasicMetadata().getPages().getStart()); assertEquals("1369", ref1.getBasicMetadata().getPages().getEnd()); ReferenceMetadata ref2 = references.get(1); assertEquals(2, (int) ref2.getPosition()); assertEquals( "[2] R. L. Campbell , R. Banner , J. Konick-McMahan , and M. D. Naylor , “ Discharge planning and home follow-up of the elderly patient with heart failure ,” The Nursing Clinics of North America , vol. 33 , no. 3 , pp. 497 , 1998 .", ref2.getText()); assertNotNull(ref2.getBasicMetadata()); assertNotNull(ref2.getBasicMetadata().getAuthors()); assertEquals(4, ref2.getBasicMetadata().getAuthors().size()); assertTrue(ref2.getBasicMetadata().getAuthors().contains("Campbell, R. L.")); assertTrue(ref2.getBasicMetadata().getAuthors().contains("Banner, R.")); assertTrue(ref2.getBasicMetadata().getAuthors().contains("Konick-McMahan, J.")); assertTrue(ref2.getBasicMetadata().getAuthors().contains("Naylor, M. D.")); assertEquals("3", ref2.getBasicMetadata().getIssue()); assertEquals("The Nursing Clinics of North America", ref2.getBasicMetadata().getSource()); assertEquals("Discharge planning and home follow-up of the elderly patient with heart failure", ref2.getBasicMetadata().getTitle()); assertEquals("33", ref2.getBasicMetadata().getVolume()); assertEquals("1998", ref2.getBasicMetadata().getYear()); assertNotNull(ref2.getBasicMetadata().getPages()); assertEquals("497", ref2.getBasicMetadata().getPages().getStart()); assertEquals("497", ref2.getBasicMetadata().getPages().getEnd()); } }