package de.dpa.oss.metadata.mapper.common; import de.dpa.oss.common.StringCharacterMappingTable; import de.dpa.oss.metadata.mapper.imaging.EncodingCharset; import org.hamcrest.CoreMatchers; import org.junit.Test; import java.util.HashMap; import java.util.Map; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.core.IsNull.notNullValue; public class StringCharacterMappingTableTest { @Test public void shouldLeaveStringAsIs() { // given StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping().build(); // use "simple" string to enable platform independent test case final String stringToMap = "abcdefg52512àá"; // when String mappedString = stringCharacterMappingTable.map(stringToMap); // then assertThat(mappedString, is(notNullValue())); assertThat(mappedString, is(stringToMap)); } @Test public void shouldLeaveStringAsIsUsingNonUTF8Encoding() { // given StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .restrictToCharsetUsingDefaultChar(EncodingCharset.ISO_8859_15, "").build(); final String stringToMap = "äöüÄÖÜ"; // when String mappedString = stringCharacterMappingTable.map(stringToMap); // then assertThat(mappedString, is(notNullValue())); assertThat(mappedString, is(stringToMap)); } @Test public void shouldMapBasicMultilingualPlane() { // given final String stringToMap = "AABABC"; StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .addCodepointMapping("0x41", "0x44") .build(); // when String mappedString = stringCharacterMappingTable.map(stringToMap); // then assertThat(mappedString, is(notNullValue())); assertThat(mappedString, is("DDBDBC")); } @Test public void shouldMapSupplementary() { /** * Character to substitute: 135260 has highSurrogate: 55364 => D844 has lowSurrogate: 56412 => DC5C */ StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .addCodepointMapping(135260, 198) .build(); final String stringToMap = "Supplement\uD844\uDC5Cry"; // when String mappedString = stringCharacterMappingTable.map(stringToMap); // then assertThat(mappedString, is(notNullValue())); assertThat(mappedString, is("SupplementÆry")); } @Test public void shouldMapByMultiCharMapping() { // given final String src01 = "¦¨´”„Ŗ“ŗĄĮĀĆĘĒČŹĖĢĶĪĻŃŅŌŲŁŚŪŻąįāćęēčźėģķīļńņōųłśūżĽŞŤľş"; final String dst01 = "|\"'\"\"R\"rAIACEECZEGKILNNOULSUZaiaceeczegkilnnoulsuzLSTls"; final String src02 = "ť˝ŔĂĹĚĎĐŇŐŘŮŰŢŕăĺěďđňőřůűţĦĤİĞĴħĥığĵĊĈĠĜŬŜċĉġĝŭŝĸĨŦĩŧŊŋŨũЁЂ"; final String dst02 = "t\"RALEDÐNÖRUÜTraleddnöruütHHIGJhhigjCCGGUSccgguskITitNnUuËT"; final String src03 = "ЄЅІЇЈЌЎёєѕіїјћќў‘’ͺ―΄ΑΒΕΖΗΙΚΜΝΟΡΤΥΪΫίΰϊϋόύ…†‡"; final String dst03 = "ESIÏJKYëesiijhky'',-'ABEZHIKMNOPTYIYiüïüóu.++"; final String src04 = "•–—・ⅰ™"; final String dst04 = "·--·i®"; // dashes final String src05 = "" + '\u2010' + '\u2011' + '\u2012' + '\u2013' + '\u2014' + '\u2212'; final String dst05 = "------"; // quotion mark final String src06 = "" + '\u2018' + '\u2019' + '\u201a' + '\u201c' + '\u201d' + '\u201e' + '\u2039' + '\u203a'; final String dst06 = "''\"\"\"\"\"\""; final String src = (src01 + src02 + src03 + src04 + src05 + src06); final String dst = (dst01 + dst02 + dst03 + dst04 + dst05 + dst06); StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .addMultiCharacterMapping(src, dst) .build(); // when then assertThat(stringCharacterMappingTable.map(src01), is(dst01)); assertThat(stringCharacterMappingTable.map(src02), is(dst02)); assertThat(stringCharacterMappingTable.map(src03), is(dst03)); System.out.println(stringCharacterMappingTable); } @Test public void shouldMapToTargetCharset() { // given StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .restrictToCharsetUsingDefaultChar(EncodingCharset.ISO_8859_15, "A").build(); final String srcString = "abcdefg"; // when String mappedStr = stringCharacterMappingTable.map(srcString); // then assertThat(mappedStr, is(srcString)); } @Test public void shouldUseReplacementCharacterIfNotMapable() { // given StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .restrictToCharsetUsingDefaultChar(EncodingCharset.ISO_8859_15, "A").build(); final String srcString = "String with unmapable char:" + new String(Character.toChars(5122 /* ᐂ */)) + "XXX"; final String expectedString = "String with unmapable char:AXXX"; // when String mappedStr = stringCharacterMappingTable.map(srcString); // then assertThat(mappedStr, is(expectedString)); } @Test public void shouldUseMappingAndReplacement() { // given StringCharacterMappingTable stringCharacterMappingTable = StringCharacterMappingTable.aCharacterMapping() .restrictToCharsetUsingDefaultChar(EncodingCharset.ISO_8859_15, "·") .addCodepointMapping(5120, 65).build(); final String srcString = "String with unmapable char:" + new String(Character.toChars(5122 /* ᐂ */)) + " and mapable char: " + new String(Character.toChars(5120)); final String expectedString = "String with unmapable char:· and mapable char: A"; // when String mappedStr = stringCharacterMappingTable.map(srcString); // then assertThat(mappedStr, is(expectedString)); } @Test public void shouldCreateFormattedMappingTable() { // given final String src = "abc<>d"; final String dst = "ABC{}D"; StringCharacterMappingTable mappingTable = StringCharacterMappingTable.aCharacterMapping() .addMultiCharacterMapping(src, dst) .build(); // when Map<Integer, String> codepointAlternativeCharacters = new HashMap<>(); codepointAlternativeCharacters.put(">".codePointAt(0), ">"); codepointAlternativeCharacters.put("<".codePointAt(0), "<"); String formattedTable = mappingTable.toString("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n", codepointAlternativeCharacters); // then assertThat( formattedTable, is("<tr><td>3c</td><td><</td><td>7b</td><td>{</td></tr>\n" + "<tr><td>3e</td><td>></td><td>7d</td><td>}</td></tr>\n" + "<tr><td>61</td><td>a</td><td>41</td><td>A</td></tr>\n" + "<tr><td>62</td><td>b</td><td>42</td><td>B</td></tr>\n" + "<tr><td>63</td><td>c</td><td>43</td><td>C</td></tr>\n" + "<tr><td>64</td><td>d</td><td>44</td><td>D</td></tr>\n")); } @Test public void shouldMapToOriginWithoutAnyGivenMapping() { // given StringCharacterMappingTable mapping = StringCharacterMappingTable.aCharacterMapping().build(); // when String result = mapping.map("inputString"); // then assertThat(result,is(CoreMatchers.notNullValue())); assertThat(result,is("inputString")); } }