/** * */ package org.voyanttools.trombone.storage.file; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.util.List; import java.util.UUID; import org.apache.commons.io.FileUtils; import org.junit.Assert; import org.junit.Test; import org.voyanttools.trombone.model.Corpus; import org.voyanttools.trombone.model.IndexedDocument; import org.voyanttools.trombone.storage.Storage; import org.voyanttools.trombone.tool.build.RealCorpusCreator; import org.voyanttools.trombone.util.FlexibleParameters; import org.voyanttools.trombone.util.TestHelper; import net.lingala.zip4j.core.ZipFile; import net.lingala.zip4j.exception.ZipException; /** * @author sgs * */ public class TromboneMigration { @Test public void testTrombone3_0() throws IOException, ZipException { File base = new File(System.getProperty("java.io.tmpdir"), "_test_"+UUID.randomUUID()); FileStorage storage = new FileStorage(new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME)); // create a dummy migrator to get the proper destination directory for unzipping AbstractFileMigrator dummyMigrator = new FileTrombone3_0Migrator(storage, ""); Assert.assertNull(null, dummyMigrator.getMigratedCorpusId()); File oldStorageDirectory = dummyMigrator.getSourceTromboneDirectory(); oldStorageDirectory.mkdir(); File file = TestHelper.getResource("migration/trombone3_0.zip"); new ZipFile(file).extractAll(oldStorageDirectory.getPath()); FileMigrator migrator; String id; Corpus corpus; // test the bundle of formats migrator = FileMigrationFactory.getMigrator(storage, "one"); assertTrue(migrator instanceof FileTrombone3_0Migrator); id = migrator.getMigratedCorpusId(); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(13, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test handling of title and author metadata migrator = FileMigrationFactory.getMigrator(storage, "two"); assertTrue(migrator instanceof FileTrombone3_0Migrator); id = migrator.getMigratedCorpusId(); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(2, corpus.size()); assertEquals("", corpus.getDocument(0).getMetadata().getAuthor()); assertEquals("Defined", corpus.getDocument(1).getMetadata().getAuthor()); // test transfer of stoplist List<String> stopwords = storage.retrieveStrings("1458405208292sw", Storage.Location.object); assertEquals(1, stopwords.size()); assertEquals("test", stopwords.get(0)); storage.destroy(); FileUtils.deleteDirectory(base); } @Test public void testTrombone4_0() throws IOException, ZipException { File base = new File(System.getProperty("java.io.tmpdir"), "_test_"+UUID.randomUUID()); FileStorage storage = new FileStorage(new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME)); // create a dummy migrator to get the proper destination directory for unzipping AbstractFileMigrator dummyMigrator = new FileTrombone4_0Migrator(storage, ""); Assert.assertNull(null, dummyMigrator.getMigratedCorpusId()); // unzip trombone 4.0 contents File oldStorageDirectory = dummyMigrator.getSourceTromboneDirectory(); oldStorageDirectory.mkdir(); File file = TestHelper.getResource("migration/trombone4_0.zip"); new ZipFile(file).extractAll(oldStorageDirectory.getPath()); FileMigrator migrator; String id; Corpus corpus; // test the bundle of formats migrator = FileMigrationFactory.getMigrator(storage, "1cb657d4f807a824536059c9ade0d907"); assertTrue(migrator instanceof FileTrombone4_0Migrator); id = migrator.getMigratedCorpusId(); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test handling of title and author metadata migrator = FileMigrationFactory.getMigrator(storage, "824b82f75e5053a0f52a0a3db2654d15"); assertTrue(migrator instanceof FileTrombone4_0Migrator); id = migrator.getMigratedCorpusId(); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(1, corpus.size()); assertEquals("Il était une fois.", corpus.getDocument(0).getMetadata().getAuthor()); assertEquals("un texte intéressant et un test. ⚠️", corpus.getDocument(0).getMetadata().getTitle()); storage.destroy(); FileUtils.deleteDirectory(base); } @Test public void testTrombone4_1() throws IOException, ZipException { File base = new File(System.getProperty("java.io.tmpdir"), "_test_"+UUID.randomUUID()); FileStorage storage = new FileStorage(new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME)); // create a dummy migrator to get the proper destination directory for unzipping FileTrombone4_1Migrator dummyMigrator = new FileTrombone4_1Migrator(storage, ""); Assert.assertNull(null, dummyMigrator.getMigratedCorpusId()); // unzip trombone 4.1 contents File oldStorageDirectory = dummyMigrator.getSourceTromboneDirectory(); oldStorageDirectory.mkdir(); File file = TestHelper.getResource("migration/trombone4_1.zip"); new ZipFile(file).extractAll(oldStorageDirectory.getPath()); FileMigrator migrator; String id; Corpus corpus; String corpusIdToMigrate; // test the bundle of formats corpusIdToMigrate = "d0be1ce35c9941b21af22260a47938e2"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_1Migrator); id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); assertTrue(storage.getCorpusStorage().corpusExists(id)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test handling of title and author metadata corpusIdToMigrate = "e0a54420a5555aa00dacd1ccf0a2ba0e"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_1Migrator); id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); assertTrue(storage.getCorpusStorage().corpusExists(id)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(1, corpus.size()); assertEquals("Il était une fois.", corpus.getDocument(0).getMetadata().getTitle()); assertEquals("un texte intéressant et un test. ⚠️", corpus.getDocument(0).getMetadata().getAuthor()); // those should be using stored top-level original sources, now try if one of them has disappeared (only use corpus creation parameters) corpusIdToMigrate = "d0be1ce35c9941b21af22260a47938e2"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_1Migrator); // remove the top-level source zip directory File deleteDir = new File(dummyMigrator.getSourceTromboneDocumentsDirectory(), "d807e3732cc09d24783201aed49d5742"); assertTrue(deleteDir.exists()); FileUtils.deleteDirectory(deleteDir); assertFalse(deleteDir.exists()); // we have to modify the parameters to point to an existing file, not the one that was used when the zip was created FlexibleParameters corpusCreationParameters = ((FileTrombone4_1Migrator) migrator).getCorpusCreationParameters(); File newUploadFile = TestHelper.getResource("archive/chars.zip"); assertTrue(newUploadFile.exists()); corpusCreationParameters.setParameter("upload", newUploadFile.getAbsolutePath()); File newCorpusParametersFile = new File(((FileTrombone4_1Migrator) migrator).getSourceTromboneCorpusDirectory(), "parameters.xml"); assertTrue(newCorpusParametersFile.exists()); corpusCreationParameters.saveFlexibleParameters(newCorpusParametersFile); // now proceed id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(id)); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // now try if the corpus creation parameters don't work out (so simple migration with very limited metadata newUploadFile = new File(UUID.randomUUID().toString()); assertFalse(newUploadFile.exists()); corpusCreationParameters.setParameter("upload", UUID.randomUUID().toString()); newCorpusParametersFile = new File(((FileTrombone4_1Migrator) migrator).getSourceTromboneCorpusDirectory(), "parameters.xml"); corpusCreationParameters.saveFlexibleParameters(newCorpusParametersFile); // now proceed id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(id)); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test migration of resources id = "7f96fa278a1cc64fc298ab808bcc2682"; assertFalse(storage.isStored(id, Storage.Location.object)); file = FileMigrationFactory.getStoredObjectFile(storage, id); assertTrue(file.exists()); assertTrue(storage.copyResource(file, id, Storage.Location.object)); assertTrue(storage.isStored(id, Storage.Location.object)); // test migration of non-existent resource id = "z"; assertFalse(storage.isStored(id, Storage.Location.object)); file = FileMigrationFactory.getStoredObjectFile(storage, id); assertNull(file); // test migration of recovered directory File recoveryFile = new File(base, oldStorageDirectory.getName()+".2"); FileUtils.copyDirectory(oldStorageDirectory, new File(base, oldStorageDirectory.getName()+".1")); FileUtils.copyDirectory(oldStorageDirectory, recoveryFile); recoveryFile.setLastModified(recoveryFile.lastModified()+1); // make sure it's the most recent storage.destroy(); FileUtils.deleteDirectory(base); } @Test public void testTrombone4_2() throws IOException, ZipException { File base = new File(System.getProperty("java.io.tmpdir"), "_test_"+UUID.randomUUID()); FileStorage storage = new FileStorage(new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME)); // create a dummy migrator to get the proper destination directory for unzipping FileTrombone4_2Migrator dummyMigrator = new FileTrombone4_2Migrator(storage, ""); Assert.assertNull(null, dummyMigrator.getMigratedCorpusId()); // unzip trombone 4.2 contents File oldStorageDirectory = dummyMigrator.getSourceTromboneDirectory(); oldStorageDirectory.mkdir(); File file = TestHelper.getResource("migration/trombone4_2.zip"); new ZipFile(file).extractAll(oldStorageDirectory.getPath()); FileMigrator migrator; String id; Corpus corpus; String corpusIdToMigrate; // test the bundle of formats corpusIdToMigrate = "45ce972416fa9278974a42830668d7ff"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_2Migrator); id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); assertTrue(storage.getCorpusStorage().corpusExists(id)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test handling of title and author metadata corpusIdToMigrate = "677eea3feacb2852718c1880602ddc8f"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_2Migrator); id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); assertTrue(storage.getCorpusStorage().corpusExists(id)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(1, corpus.size()); assertEquals("Il était une fois.", corpus.getDocument(0).getMetadata().getTitle()); assertEquals("un texte intéressant et un test. ⚠️", corpus.getDocument(0).getMetadata().getAuthor()); // those should be using stored top-level original sources, now try if one of them has disappeared (only use corpus creation parameters) corpusIdToMigrate = "45ce972416fa9278974a42830668d7ff"; migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTrombone4_2Migrator); // remove the top-level source zip directory File deleteDir = new File(dummyMigrator.getSourceTromboneDocumentsDirectory(), "f279e4ef454f03de4e9a31b4c6032fef"); assertTrue(deleteDir.exists()); FileUtils.deleteDirectory(deleteDir); assertFalse(deleteDir.exists()); // we have to modify the parameters to point to an existing file, not the one that was used when the zip was created FlexibleParameters corpusCreationParameters = ((FileTrombone4_2Migrator) migrator).getCorpusCreationParameters(); File newUploadFile = TestHelper.getResource("archive/chars.zip"); assertTrue(newUploadFile.exists()); corpusCreationParameters.setParameter("upload", newUploadFile.getAbsolutePath()); File newCorpusParametersFile = new File(((FileTrombone4_2Migrator) migrator).getSourceTromboneCorpusDirectory(), "parameters.xml"); assertTrue(newCorpusParametersFile.exists()); corpusCreationParameters.saveFlexibleParameters(newCorpusParametersFile); // now proceed id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(id)); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // now try if the corpus creation parameters don't work out (so simple migration with very limited metadata newUploadFile = new File(UUID.randomUUID().toString()); assertFalse(newUploadFile.exists()); corpusCreationParameters.setParameter("upload", UUID.randomUUID().toString()); newCorpusParametersFile = new File(((FileTrombone4_2Migrator) migrator).getSourceTromboneCorpusDirectory(), "parameters.xml"); corpusCreationParameters.saveFlexibleParameters(newCorpusParametersFile); // now proceed id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(id)); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(15, corpus.size()); for (IndexedDocument doc : corpus) { Assert.assertFalse(doc.getMetadata().getTitle().equals("rawbytes")); } // test migration of resources id = "0366879fcdc310ae2511e58ebb4ae64b"; assertFalse(storage.isStored(id, Storage.Location.object)); file = FileMigrationFactory.getStoredObjectFile(storage, id); assertTrue(file.exists()); assertTrue(storage.copyResource(file, id, Storage.Location.object)); assertTrue(storage.isStored(id, Storage.Location.object)); // test migration of non-existent resource id = "z"; assertFalse(storage.isStored(id, Storage.Location.object)); file = FileMigrationFactory.getStoredObjectFile(storage, id); assertNull(file); storage.destroy(); FileUtils.deleteDirectory(base); } @Test public void testTromboneCurrent() throws IOException, ZipException { File base = new File(System.getProperty("java.io.tmpdir"), "_test_"+UUID.randomUUID()); File current = new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME); File recovery = new File(base, FileStorage.DEFAULT_TROMBOME_DIRECTORY_NAME+".1"); FileStorage storage = new FileStorage(current); RealCorpusCreator creator; FlexibleParameters parameters; parameters = new FlexibleParameters(); parameters.addParameter("string", "dark and stormy night in document one"); parameters.addParameter("tool", "StepEnabledIndexedCorpusCreator"); parameters.addParameter("noCache", 1); creator = new RealCorpusCreator(storage, parameters); creator.run(); String corpusIdToMigrate = creator.getStoredId(); storage.getLuceneManager().getIndexWriter().commit(); storage.getLuceneManager().getIndexWriter().close(); FileUtils.moveDirectory(current, recovery); assertTrue(recovery.exists()); assertFalse(current.exists()); storage = new FileStorage(current); FileMigrator migrator = FileMigrationFactory.getMigrator(storage, corpusIdToMigrate); assertTrue(migrator instanceof FileTromboneCurrentMigrator); assertEquals(((FileTromboneCurrentMigrator) migrator).getSourceTromboneDirectory().getPath(), recovery.getPath()); String id = migrator.getMigratedCorpusId(); assertTrue(storage.getCorpusStorage().corpusExists(corpusIdToMigrate)); assertTrue(storage.getCorpusStorage().corpusExists(id)); Corpus corpus = storage.getCorpusStorage().getCorpus(id); assertEquals(1, corpus.size()); Assert.assertTrue(corpus.getDocument(0).getMetadata().getTitle().equals("dark and stormy night in document one")); storage.destroy(); FileUtils.deleteDirectory(base); } }