package jannovar.annotation;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import jannovar.common.Constants;
import jannovar.common.VariantType;
import jannovar.exception.AnnotationException;
import jannovar.exception.JannovarException;
import jannovar.exception.VCFParseException;
import jannovar.exome.Variant;
import jannovar.genotype.GenotypeFactoryA;
import jannovar.genotype.SingleGenotypeFactory;
import jannovar.genotype.MultipleGenotypeFactory;
import jannovar.io.SerializationManager;
import jannovar.io.VCFLine;
import jannovar.reference.Chromosome;
import jannovar.reference.TranscriptModel;
/**
* @author mjaeger
*
*/
public class DuplicationAnnotationTest implements Constants {
private static HashMap<Byte, Chromosome> chromosomeMap = null;
/** This is needed for the VCF line initialization. */
private static GenotypeFactoryA genofactory=null;
@BeforeClass public static void setUp() throws IOException, JannovarException {
ArrayList<TranscriptModel> kgList = null;
java.net.URL url = SynonymousAnnotationTest.class.getResource(UCSCserializationTestFileName);
String path = url.getPath();
SerializationManager manager = new SerializationManager();
kgList = manager.deserializeKnownGeneList(path);
chromosomeMap = Chromosome.constructChromosomeMapWithIntervalTree(kgList);
genofactory = new MultipleGenotypeFactory();
VCFLine.setGenotypeFactory(genofactory);
}
@AfterClass public static void releaseResources() {
chromosomeMap = null;
System.gc();
}
/**
* <P>
* This is the test for the in-frame duplication of a single triplicate /
* one amino acids '+' strand
* </P>
* Mutalyzer:
* NM_001005495(OR2T3_v001):c.769_771dup
* NM_001005495(OR2T3_i001):p.(Phe257dup)
*/
@Test public void testDuplicationVar1() throws JannovarException {
String s = "1 248637422 . C CTTC 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 1;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(248637422,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("TTC",alt);
String annot = ann.getVariantAnnotation();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
Assert.assertEquals("OR2T3(uc001iel.1:exon1:c.769_771dupTTC:p.F257dup)", annot);
}
}
/**
*<P>
* annovar: FRG1:uc003izs.3:exon6:c.439_440insA:p.M147fs,
* chr4:190878559->A
* FRG1 is on the "+" strand
* Jannovar says: FRG1(uc003izs.3:exon6:c.438dupA:p.M147fs)
* expected
* <...c003izs.3:exon6:c.43[9]dupA:p.M147fs)>
* but was:
* <...c003izs.3:exon6:c.43[8]dupA:p.M147fs)>
* is uc003izs.3 NM_004477.2
* Mutalyzer says
* NM_004477.2(FRG1_v001):c.439dup
* NM_004477.2(FRG1_i001):p.(Met147Asnfs*8)
* Raw variant 1: duplication from 630 to 630
* GAACCAGTCTTTCAAAATGGGAAAA - TGGCTTTGTTGGCCTCAAATAGCTG
* GAACCAGTCTTTCAAAATGGGAAAA A TGGCTTTGTTGGCCTCAAATAGCTG
* Thus, 439 and not 438 is the correct number for the duplicated nucleotide.
* Jannovar lists refvarstart as 630. This is the last "A" of a polyA tract in
* the gene (see genbank L76159.1).
* Jannovar lists refcdsstart as 192. This is the position of the start of the
* start codon in FRG1 (L76159.1).
<...c003izs.3:exon6:c.43[9]dupA:p.M147fs)> but was:
<...c003izs.3:exon6:c.43[8]dupA:p.M147fs)>
*</P>
*/
@Test public void testInsertionVar29y() throws AnnotationException,VCFParseException,JannovarException {
String s = "4 190878559 . A AA 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 4;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c==null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann =c.getAnnotationList(pos,ref,alt);
VariantType varType = ann.getVariantType();
String annot = ann.getVariantAnnotation();
Assert.assertEquals(190878559,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("A",alt);
Assert.assertEquals(VariantType.FS_DUPLICATION,varType);
Assert.assertEquals("FRG1(uc003izs.3:exon6:c.439dupA:p.M147fs)",annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of a single triplicate /
* one amino acids
* '+' strand
* </P>
*/
@Test public void testDuplicationVar9test() throws AnnotationException,VCFParseException, JannovarException
{
String s = "9 137968918 . A AAGA 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 9;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c==null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann =c.getAnnotationList(pos,ref,alt);
ArrayList<Annotation> lst = ann.getAnnotationList();
Assert.assertEquals(137968918,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("AGA",alt);
VariantType varType = ann.getVariantType();
String annot = ann.getVariantAnnotation();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION,varType);
Assert.assertEquals("OLFM1(uc010naq.2:exon2:c.325_327dupAGA:p.R109dup)",annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of six nuc.acids / two
* amino acids '+' strand
* </P>
* Mutalyzer:
* NM_001005495.1(OR2T3_v001):c.766_771dup
* NM_001005495.1(OR2T3_i001):p.(Leu256_Phe257dup)
*
*
* <...6_771dupCTCTTC:p.L25[6_F257]dup)> but was:
* <...6_771dupCTCTTC:p.L25[4_F256]dup)>
*/
@Test public void testDuplicationVar2() throws JannovarException {
String s = "1 248637422 . C CCTCTTC 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 1;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(248637422,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("CTCTTC",alt);
String annot = ann.getVariantAnnotation();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
Assert.assertEquals("OR2T3(uc001iel.1:exon1:c.766_771dupCTCTTC:p.L256_F257dup)", annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of 12 nuc.acids / tree
* amino acids '+' strand
* </P>
*/
@Test
public void testDuplicationVar3() throws JannovarException {
String s = "1 248637422 . C CCTGCTGCTCTTC 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 1;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(248637422,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("CTGCTGCTCTTC",alt);
String annot = ann.getVariantAnnotation();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
Assert.assertEquals("OR2T3(uc001iel.1:exon1:c.760_771dupCTGCTGCTCTTC:p.L254_F257dup)", annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of a single triplicate /
* one amino acids '-' strand
* </P>
* Mutalyzer:
* NM_022149.4(MAGEF1_v001):c.424_426dup
* NM_022149.4(MAGEF1_i001):p.(Thr142dup)
*/
@Test
public void testDuplicationVar4() throws JannovarException {
String s = "3 184429186 . A AAGT 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
byte chr = 3;
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(184429186,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("AGT",alt);
String annot = ann.getVariantAnnotation();
//System.out.println(annot);
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
Assert.assertEquals("MAGEF1(uc003fpa.3:exon1:c.424_426dupACT:p.T142dup)", annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of 6 nuc.acids / two amino
* acids '-' strand
* </P>
* mutalzyer:
* NM_022149.4(MAGEF1_v001):c.439_444dup
* NM_022149.4(MAGEF1_i001):p.(Asn147_Lys148dup)
*/
@Test
public void testDuplicationVar5() throws JannovarException {
byte chr = 3;
String s = "3 184429171 . T TTTTGTT 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
Assert.assertEquals(184429171,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("TTTGTT",alt);
String annot = ann.getVariantAnnotation();
//System.out.println(annot);
VariantType varType = ann.getVariantType();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
Assert.assertEquals("MAGEF1(uc003fpa.3:exon1:c.439_444dupAACAAA:p.N147_K148dup)", annot);
}
}
/**
* <P>
* This is the test for the in-frame duplication of 12 nuc.acids / three
* amino acids '-' strand
* </P>
*/
@Test
public void testDuplicationVar6() throws JannovarException {
byte chr = 3;
String s = "3 184429171 . T TTTTTAGTTTGTT 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(184429171,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("TTTTAGTTTGTT",alt);
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
String annot = ann.getVariantAnnotation();
Assert.assertEquals("MAGEF1(uc003fpa.3:exon1:c.439_450dupAACAAACTAAAA:p.N147_K150dup)", annot);
}
}
/**
* <P>
* This is the test for the offset (+1) duplication of a single triplicate /
* one amino acids shifting the Stop-codon
* '+' strand
* </P>
* mutalyzer:
* NM_001005495.1(OR2T3_v001):c.949_954dup
* NM_001005495.1(OR2T3_i001):p.(*319Gluext*2)
* I think mutalyzer is wrong here, the stop is right after the duplication.
*/
@Test public void testDuplicationVar7() throws JannovarException {
String s = "1 248637605 . G GGAAAAG 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
byte chr = (byte) v.get_chromosome();
Chromosome c = chromosomeMap.get(chr);
if (c == null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann = c.getAnnotationList(pos, ref, alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(248637605,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("GAAAAG",alt);
Assert.assertEquals(VariantType.NON_FS_DUPLICATION, varType);
String annot = ann.getVariantAnnotation();
Assert.assertEquals("OR2T3(uc001iel.1:exon1:c.949_954dupGAAAAG:p.E317_K318dup)", annot);
}
}
/**
*<P>
* annovar: MAGEF1:uc003fpa.3:exon1:c.456_457insGGA:p.L152delinsLE,
* chr3:184429154->TCC
*</P>
* uc003fpa.3:exon1:c.456_458dupGGA:p.L152delinsLG
* Refseq: NM_022149
* Mutalyzer:
* Note that the position gets shifted downstream to bcome the most 3' position.
* NM_022149.4(MAGEF1_v001):c.474_476dup
* NM_022149.4(MAGEF1_i001):p.(Glu158dup)
*/
@Test public void testInsertionVar25() throws JannovarException {
// int pos = 184429154;
// String ref = "-";
// String alt = "TCC";
String s = "3 184429154 . C CTCC 100 PASS QD=11.71; GT:GQ 0/1:99 0/0:99 0/1:99 0/0:99 0/1:99";
VCFLine line = new VCFLine(s);
Variant v = line.toVariant();
int pos = v.get_position();
String ref = v.get_ref();
String alt = v.get_alt();
byte chr = (byte) v.get_chromosome();
Chromosome c = chromosomeMap.get(chr);
if (c==null) {
Assert.fail("Could not identify chromosome \"" + chr + "\"");
} else {
AnnotationList ann =c.getAnnotationList(pos,ref,alt);
VariantType varType = ann.getVariantType();
Assert.assertEquals(184429154,pos);
Assert.assertEquals("-",ref);
Assert.assertEquals("TCC",alt);
String annot = ann.getVariantAnnotation();
Assert.assertEquals(VariantType.NON_FS_DUPLICATION,varType);
Assert.assertEquals("MAGEF1(uc003fpa.3:exon1:c.474_476dupGGA:p.E158dup)",annot);
}
}
}
/* eof. */