package medsavant.incidental; import com.healthmarketscience.sqlbuilder.BinaryCondition; import com.healthmarketscience.sqlbuilder.ComboCondition; import com.healthmarketscience.sqlbuilder.Condition; import com.healthmarketscience.sqlbuilder.UnaryCondition; import com.healthmarketscience.sqlbuilder.dbspec.basic.DbColumn; import java.rmi.RemoteException; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import medsavant.incidental.localDB.IncidentalDB; import org.ut.biolab.medsavant.client.login.LoginController; import org.ut.biolab.medsavant.client.project.ProjectController; import org.ut.biolab.medsavant.client.reference.ReferenceController; import org.ut.biolab.medsavant.shared.serverapi.VariantManagerAdapter; import org.ut.biolab.medsavant.MedSavantClient; import org.ut.biolab.medsavant.shared.db.TableSchema; import org.ut.biolab.medsavant.shared.format.BasicVariantColumns; import org.ut.biolab.medsavant.shared.model.SessionExpiredException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.ut.biolab.medsavant.client.util.ClientMiscUtils; import org.ut.biolab.medsavant.client.util.DataRetriever; import org.ut.biolab.medsavant.client.view.component.SearchableTablePanel; import org.ut.biolab.medsavant.shared.format.AnnotationFormat; import org.ut.biolab.medsavant.shared.format.BasicPatientColumns; import org.ut.biolab.medsavant.shared.format.CustomField; import org.ut.biolab.medsavant.shared.serverapi.PatientManagerAdapter; /** * Compute and store all incidental findings for a patient. * * @author rammar */ public class IncidentalFindings { private static final Log LOG = LogFactory.getLog(MedSavantClient.class); private final int DB_VARIANT_REQUEST_LIMIT= 5000; private final String JANNOVAR_EFFECT= "ANNOVAR Effect"; private final String JANNOVAR_GENE= "ANNOVAR Gene Symbol"; private final String STOPGAIN= "STOPGAIN"; private final String FRAMESHIFTS= "FS_%"; private final String SPLICING= "SPLICING"; private final String CLINVAR_COLUMN= "clinvar_20131105b, info"; private final String HGMD_COLUMN= "hgmd_pro_allmut, gene"; private String GENDER= null; private int INHERITANCE_INDEX= -1; private int CLASSIFICATION_INDEX= -1; private Map<String, String> dbAliasToColumn; private String dnaID; private int coverageThreshold; private double hetRatio; private double alleleFrequencyThreshold; private Map<String, String> zygosityMap; private String incidentalPanel; private List<Object[]> allVariants; private TableSchema ts; public List<String> header; private int effectIndex; private int geneSymbolIndex; private int af1000gIndex; private Pattern dp4Pattern= Pattern.compile(";?DP4=([^;]+);?", Pattern.CASE_INSENSITIVE); private Pattern truncationPattern= Pattern.compile("STOPGAIN|FS_\\w+|SPLICING", Pattern.CASE_INSENSITIVE); private Pattern geneSymbolPattern= Pattern.compile("^([^(]+)"); private Pattern formatFieldPattern= Pattern.compile(";?FORMAT=([^;]*AD[^;]*);?", Pattern.CASE_INSENSITIVE); // must contain "AD" in format private Pattern sampleInfoFieldPattern= Pattern.compile(";?SAMPLE_INFO=([^;]+);?", Pattern.CASE_INSENSITIVE); /** Find all mutations in disease genes and filter for relevance. * @param dnaID Patient's DNA ID * @param cov The minimum coverage for alt variants * @param ratio The minimum ratio of alt/total reads to be considered as possibly het * @param afThreshold Allele frequency threshold * @param afColumns The columns of the table corresponding to DBs to be used for the allele frequency cutoff * @param panel Which incidental panel to use. For example, "CGD" or "ACMG". */ public IncidentalFindings(String dnaID, int cov, double ratio, double afThreshold, List<Object> afColumns, String panel) { this.dnaID= dnaID; coverageThreshold= cov; hetRatio= ratio; alleleFrequencyThreshold= afThreshold; incidentalPanel= panel; allVariants= new ArrayList<Object[]>(DB_VARIANT_REQUEST_LIMIT); // initial capacity DB_VARIANT_REQUEST_LIMIT ts= ProjectController.getInstance().getCurrentVariantTableSchema(); dbAliasToColumn= getDbToHumanReadableMap(); // Get column aliases from column names header= getTableHeader(); effectIndex= header.indexOf(JANNOVAR_EFFECT); geneSymbolIndex= header.indexOf(JANNOVAR_GENE); // For variant DB lookup - zygosity values based on VariantRecord in org.ut.biolab.medsavant.shared.vcf zygosityMap= new HashMap<String, String>(); zygosityMap.put("HomoAlt", "hom"); zygosityMap.put("Hetero", "het"); try { // Get gender info PatientManagerAdapter pma= MedSavantClient.PatientManager; List<Object[]> allProjectPatients= pma.getBasicPatientInfo( LoginController.getInstance().getSessionID(), ProjectController.getInstance().getCurrentProjectID(), Integer.MAX_VALUE); for (Object[] row : allProjectPatients) { if (dnaID.equals((String) row[BasicPatientColumns.INDEX_OF_DNA_IDS])) { GENDER= (String) ClientMiscUtils.genderToString( (Integer) row[BasicPatientColumns.INDEX_OF_GENDER]); break; // no need to search further } } /* Add any relevant columns based on the data that has been appended. */ header.add("Inheritance"); INHERITANCE_INDEX= header.size() - 1; header.add("Classification"); CLASSIFICATION_INDEX= header.size() - 1; // Download and process variants storeVariants(afColumns); } catch (Exception ex) { System.err.println("IncidentalFindings error: " + ex.toString()); ex.printStackTrace(); } } /** * Searchable table output for development testing. * @param selectedViewColumns Columns preselected for SearchableTablePanel output */ public SearchableTablePanel getTableOutput(int[] selectedViewColumns) { DataRetriever<Object[]> dr= new DataRetriever<Object[]>() { @Override public List<Object[]> retrieve(int start, int limit) throws Exception { return allVariants; } @Override public int getTotalNum() { return allVariants.size(); } @Override public void retrievalComplete() { } }; Class[] STRING_ONLY_COLUMN_CLASSES= new Class[header.size()]; for (int i= 0; i != STRING_ONLY_COLUMN_CLASSES.length; ++i) STRING_ONLY_COLUMN_CLASSES[i]= String.class; // FOR NOW ONLY CALLING THESE STRINGS SearchableTablePanel t; // if the selected columns use incorrect/outdated indices, default to all columns try { if (selectedViewColumns == null) { t= new SearchableTablePanel("Results", header.toArray(new String[header.size()]), STRING_ONLY_COLUMN_CLASSES, new int[0], true, true, Integer.MAX_VALUE, false, SearchableTablePanel.TableSelectionType.ROW, Integer.MAX_VALUE, dr); } else { t= new SearchableTablePanel("Results", header.toArray(new String[header.size()]), STRING_ONLY_COLUMN_CLASSES, getHiddenColumns(selectedViewColumns, header.size()), true, true, Integer.MAX_VALUE, false, SearchableTablePanel.TableSelectionType.ROW, Integer.MAX_VALUE, dr); } } catch (Exception e) { t= new SearchableTablePanel("Results", header.toArray(new String[header.size()]), STRING_ONLY_COLUMN_CLASSES, new int[0], true, true, Integer.MAX_VALUE, false, SearchableTablePanel.TableSelectionType.ROW, Integer.MAX_VALUE, dr); } t.setResizeOff(); t.setExportButtonVisible(true); t.setExportButtonEnabled(true); t.setHelpButtonVisible(false); //t.setChooseColumnsButtonVisible(false); t.forceRefreshData(); // without this, the table is empty with just a header return t; } /** Get the number of variants returned for this Incidental Findings instance. */ public int getVariantCount() { return allVariants.size(); } /** * Filter and store all variants for this individual. * @param afColumns A string list of the column aliases corresponding to the allele frequency DBs */ private void storeVariants(List<Object> afColumns) throws SQLException, RemoteException, SessionExpiredException { VariantManagerAdapter vma= MedSavantClient.VariantManager; /* Put the conditions together with ANDs and ORs. */ ComboCondition cc= new ComboCondition(ComboCondition.Op.AND); Condition dnaCondition= BinaryCondition.equalTo(ts.getDBColumn(BasicVariantColumns.DNA_ID), dnaID); cc.addCondition(dnaCondition); /* Iterate through the selected allele freq DBs and add to the condition. */ for (Object columnAlias : afColumns) { String columnName= dbAliasToColumn.get((String) columnAlias); ComboCondition currentAFComboCondition= new ComboCondition(ComboCondition.Op.OR); currentAFComboCondition.addCondition( BinaryCondition.lessThan(ts.getDBColumn(columnName), Double.toString(alleleFrequencyThreshold), true)); currentAFComboCondition.addCondition( UnaryCondition.isNull(ts.getDBColumn(columnName))); cc.addCondition(currentAFComboCondition); } /* Report only variants that have truncation mutations or that are present in * a disease variant database. So far, Clinvar and HGMD. */ ComboCondition truncationOrDBComboCondition= new ComboCondition(ComboCondition.Op.OR); // Truncation mutations, if they exist if (dbAliasToColumn.get(JANNOVAR_EFFECT) != null) { ComboCondition truncationComboCondition= new ComboCondition(ComboCondition.Op.OR); truncationComboCondition.addCondition( BinaryCondition.like(ts.getDBColumn(dbAliasToColumn.get(JANNOVAR_EFFECT)), STOPGAIN)); truncationComboCondition.addCondition( BinaryCondition.like(ts.getDBColumn(dbAliasToColumn.get(JANNOVAR_EFFECT)), SPLICING)); truncationComboCondition.addCondition( BinaryCondition.like(ts.getDBColumn(dbAliasToColumn.get(JANNOVAR_EFFECT)), FRAMESHIFTS)); truncationOrDBComboCondition.addCondition(truncationComboCondition); } // Clinvar DB annotations, if they exist if (dbAliasToColumn.get(CLINVAR_COLUMN) != null) { Condition clinvarCondition= UnaryCondition.isNotNull( ts.getDBColumn(dbAliasToColumn.get(CLINVAR_COLUMN))); truncationOrDBComboCondition.addCondition(clinvarCondition); } // HGMD DB annotations, if they exist if (dbAliasToColumn.get(HGMD_COLUMN) != null) { Condition hgmdCondition= UnaryCondition.isNotNull( ts.getDBColumn(dbAliasToColumn.get(HGMD_COLUMN))); truncationOrDBComboCondition.addCondition(hgmdCondition); } cc.addCondition(truncationOrDBComboCondition); Condition[][] conditionMatrix= new Condition[1][1]; conditionMatrix[0][0]= cc; /* Get variants in chunks based on a request limit offset to save memory. */ int position= 0; List<Object[]> currentVariants= null; while (currentVariants == null || currentVariants.size() != 0) { currentVariants= vma.getVariants(LoginController.getInstance().getSessionID(), ProjectController.getInstance().getCurrentProjectID(), ReferenceController.getInstance().getCurrentReferenceID(), conditionMatrix, position, DB_VARIANT_REQUEST_LIMIT); allVariants.addAll(filterVariants(currentVariants)); position += DB_VARIANT_REQUEST_LIMIT; } /* Identify potential compound hets. */ identifyPotentialCompoundHet(); } /** Get gene symbol. * * @return String of gene symbol, null if pattern does not match hgvs text from DB. */ private String getGeneSymbol(Object[] row) { String geneSymbol= null; String hgvsText= (String) row[geneSymbolIndex]; Matcher geneSymbolMatcher= geneSymbolPattern.matcher(hgvsText); if (geneSymbolMatcher.find()) { geneSymbol= geneSymbolMatcher.group(1); } return geneSymbol; } /** Filter a list of variants and return only those variants that pass all * filters. */ private List<Object[]> filterVariants(List<Object[]> input) { List<Object[]> filtered= new LinkedList<Object[]>(); for (Object[] row : input) { List<String> query= getClassification(getGeneSymbol(row), (String) row[BasicVariantColumns.INDEX_OF_ZYGOSITY], incidentalPanel); String classification= query.get(0); String inheritance= query.get(1); if (inheritance != null && !inheritance.equals("") && coverageAndRatioPass(row, true)) { List<Object> listRow= new ArrayList<Object>(Arrays.asList(row)); listRow.add(inheritance); listRow.add(classification); // Add to output list filtered.add(listRow.toArray()); } } return filtered; } /** Checks if this variant encodes a truncation mutation. * Expects Jannovar-style annotations in the EFFECT column. Based on Jannovar * documentation (JannovarTutorial.pdf) the possibilities are: * * NONSYNONYMOUS * STOPGAIN * STOPLOSS * NON_FS_INSERTION * FS_INSERTION * NON_FS_SUBSTITUTION * NON_FS_DELETION * FS_SUBSTITUTION * ncRNA_EXONIC * ncRNA_SPLICING * UTR3 * UTR5 * SYNONYMOUS * INTRONIC * ncRNA_INTRONIC * UPSTREAM * DOWNSTREAM * INTERGENIC * ERROR * * 2 other critically important annotations not mentioned in the documentation * are: * * SPLICING * FS_DELETION * * @deprecated Do not use */ private boolean hasTruncationMutation(Object[] row) { boolean result= false; String effectText= (String) row[effectIndex]; Matcher truncationMatcher= truncationPattern.matcher(effectText); if (truncationMatcher.matches()) { // use matches() rather than find() here result= true; } return result; } /** * Checks to see if variant is in a clinical database. * Current DBs include Clinvar and HGMD. * * IMPORTANT! * CURRENTLY HARDCODED FOR DEMO PURPOSES, NEEDS TO BE INTEGRATED INTO BINARYCONDITION/ * COMBOCONDITION QUERY TO SERVER. * * @deprecated Do not use */ private boolean inClinicalDB(Object[] row) { boolean result= false; String clinvarText= (String) row[header.indexOf("clinvar_20131105b, info")]; String hgmdText= (String) row[header.indexOf("hgmd_pro_allmut, gene")]; if ((clinvarText != null && clinvarText.length() > 0) || (hgmdText != null && hgmdText.length() > 0)) { result= true; } return result; } /** * Checks that this variant passes coverage and heterozygote ratio thresholds. * If there is no coverage information present for a variant, it is reported * as specified by the if-absent parameter. * @param row The variant row which contains all VCF details * @param outputIfAbsent If AD is absent from FORMAT file and DP4 is absent from INFO field, decides whether to output variant * @return Returns if this variant passes the coverage and ratio cutoffs */ private boolean coverageAndRatioPass(Object[] row, boolean outputIfAbsent) { boolean result= false; String info_field= (String) row[BasicVariantColumns.INDEX_OF_CUSTOM_INFO]; Matcher dp4Matcher= dp4Pattern.matcher(info_field); Matcher formatFieldMatcher= formatFieldPattern.matcher(info_field); Matcher sampleInfoFieldMatcher= sampleInfoFieldPattern.matcher(info_field); /* Process DP4 or AD or AO text (from VCF INFO or Format columns) if present. */ if (dp4Matcher.find()) { // NOTE: need to run find() to get group() below String dp4Text= dp4Matcher.group(1); /* From the samtools definition of the DP4 field: * Number of: * 1) forward ref alleles; * 2) reverse ref; * 3) forward non-ref; * 4) reverse non-ref alleles, used in variant calling. * Sum can be smaller than DP because low-quality bases are not counted. * * URL: http://samtools.sourceforge.net/mpileup.shtml */ // Split on ":" and check if 1) alt >= threshold, 2) alt/total >= ratio_threshold String[] delimited= dp4Text.split(","); int refCount= Integer.parseInt(delimited[0]) + Integer.parseInt(delimited[1]); int altCount= Integer.parseInt(delimited[2]) + Integer.parseInt(delimited[3]); double ratio= ((double) altCount)/(altCount + refCount); if (altCount >= coverageThreshold && ratio >= hetRatio) { result= true; } } else if (formatFieldMatcher.find() && sampleInfoFieldMatcher.find()) { // NOTE: need to run find() to get group() below String formatFieldText= formatFieldMatcher.group(1); String sampleInfoFieldText= sampleInfoFieldMatcher.group(1); // Split on ":" and check if 1) alt >= threshold, 2) alt/total >= ratio_threshold String[] adDelimited= formatFieldText.split(":"); int adIndex= Arrays.asList(adDelimited).indexOf("AD"); String[] adCoverageDelimited= sampleInfoFieldText.split(":")[adIndex].split(","); int refCount= Integer.parseInt(adCoverageDelimited[0]); int altCount= Integer.parseInt(adCoverageDelimited[1]); double ratio= ((double) altCount)/(altCount + refCount); if (altCount >= coverageThreshold && ratio >= hetRatio) { result= true; } } else if (outputIfAbsent) { // check the default behaviour if coverage is absent result= true; } return result; } /** Print the table columns to see how it's formatted internally. * For development testing. */ private void printTableSchema(TableSchema ts) { List<DbColumn> listOfColumns= ts.getColumns(); int index= 0; for (DbColumn d : listOfColumns) { System.out.println(d.getColumnNameSQL() + "\t" + index++); } } /** Get the table header as a String list. */ private List<String> getTableHeader() { List<String> header= new LinkedList<String>(); try { AnnotationFormat[] afs = ProjectController.getInstance().getCurrentAnnotationFormats(); for (AnnotationFormat af : afs) { for (CustomField field : af.getCustomFields()) { header.add(field.getAlias()); // Fields are added in correct order } } } catch (Exception e) { LOG.error(e); } return header; } /** Get the variant classification. * Variant can be classified as: disease, complex, potential compound het * or carrier. * @return a list containing the disease classification and inheritance */ private List<String> getClassification(String geneSymbol, String zygosity, String panel) { String classification= null; String inheritance= null; String queryAddition= ""; if (panel.equals("ACMG")) { queryAddition= "AND C.gene in (SELECT gene FROM acmg) "; } // MUST use single quotes String sql= "SELECT D.classification, S.synonym, C.* " + "FROM CGD C, disease_classification D, CGD_synonym S " + "WHERE C.gene LIKE '" + geneSymbol + "' " + queryAddition + " AND C.inheritance = S.inheritance " + " AND S.synonym = D.inheritance " + " AND D.zygosity LIKE '" + zygosityMap.get(zygosity) + "' " + " AND (D.gender LIKE '" + GENDER + "' OR D.gender LIKE 'both') "; ResultSet rs; try { rs= IncidentalDB.executeQuery(sql); int rowCount= 0; while (rs.next()) { /* There should only be a single result from this query. */ if (rowCount > 1) System.err.println(">1 row found for query: " + sql); /* First and only element is the inheritance, as specified in sql above. */ List temp= IncidentalDB.getRowAsList(rs); classification= (String) temp.get(0); inheritance= (String) temp.get(1); rowCount++; } } catch (SQLException e) { System.out.println("This was just executed: " + sql); e.printStackTrace(); } List<String> output= new ArrayList<String>(); output.add(classification); output.add(inheritance); return output; } /** Get the header for the table using the column aliases. */ public Map<String, String> getDbToHumanReadableMap() { Map<String, String> dbAliasToNameMap = new HashMap<String, String>(); try { AnnotationFormat[] afs = ProjectController.getInstance().getCurrentAnnotationFormats(); for (AnnotationFormat af : afs) { for (CustomField field : af.getCustomFields()) { dbAliasToNameMap.put(field.getAlias(), field.getColumnName()); } } } catch (Exception e) { LOG.error(e); } return dbAliasToNameMap; } /** Marks all potential compound heterozygotes in the set of variants. */ private void identifyPotentialCompoundHet() { /* Iterate through all variants and look for the same gene with >1 pika * instance where it's marked as a carrier. */ Map<String, Integer> geneCount= new HashMap<String, Integer>(); // Get the count for all carriers for (Object[] row : allVariants) { String gene= getGeneSymbol(row); if (row[CLASSIFICATION_INDEX].equals("carrier")) { if (!geneCount.containsKey(gene)) geneCount.put(gene, 0); geneCount.put(gene, (Integer) geneCount.get(gene) + 1); } } // Mark compound hets for (Object[] row : allVariants) { String gene= getGeneSymbol(row); if (((String) row[CLASSIFICATION_INDEX]).equals("carrier") && ((Integer) geneCount.get(gene)) > 1) { row[CLASSIFICATION_INDEX]= "potential compound het"; } } } /** * Get inverse int[]. If presented with [0,3,4] and the header has 6 elements, * returns the inverse list [1,2,5]. * @precondition selected.length <= length */ public static int[] getHiddenColumns(int[] selected, int length) { int[] results= new int[length - selected.length]; int selectedIndex= 0; int resultsIndex= 0; for (int i= 0; i != length; ++i) { if (selectedIndex < selected.length && selected[selectedIndex] == i) { selectedIndex++; } else { results[resultsIndex++]= i; } } return results; } }