package org.seqcode.data.seqdata.tools; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.sql.PreparedStatement; import java.sql.SQLException; import org.seqcode.data.connections.DatabaseConnectionManager; import org.seqcode.data.connections.DatabaseException; import org.seqcode.data.connections.UnknownRoleException; import org.seqcode.data.core.MetadataLoader; import org.seqcode.data.seqdata.SeqAlignment; import org.seqcode.data.seqdata.SeqDataLoader; import org.seqcode.data.seqdata.SeqExpt; import org.seqcode.genome.Genome; import org.seqcode.gseutils.Args; import org.seqcode.gseutils.NotFoundException; /** * Update existing SeqExpt & SeqAlignment descriptions from a file. * This imports a file like deepseq.list when populating a new installation * of the seqdata mysql database. * * @author mahony * * Usage: UpdateAlignmentsFromFile --list "filename" * * The assumed file is in the deepseq.list format, with the following fields: * *0) ReadDBID *1) ExptType *2) Lab *3) ExptCondition *4) ExptTarget *5) CellLine *6) Replicate *7) Aligner *8) Genome *9) Permissions *10) PubSource *11) PublicDBID *12) CollabExptID *13) CollabAlignID *14) ReadType *15) AlignType *16) ReadLength *17) TotalReads *18) AlignedHits *19) UniquelyAlignedHits *20) DBLoadedHits *21) DBLoadedWeight *22) DBLoadedType2Hits *23) DBLoadedType2Weight *24) DBLoadedPairs *25) DBLoadedPairWeight *26) ReadsFile *27) AlignDir *28) AlignFile *29) IDXFile *30) AlignParamFile *31) ExptNote *32) LoadDate *33) ExptName * */ public class UpdateAlignmentsFromFile { public static void main(String args[]) throws SQLException, IOException, NotFoundException { String filename = Args.parseString(args,"list",null); if(filename==null){ System.err.println("UpdateAlignmentsFromFile:\n" + "\t--list <deepseq.list format file>\n"); System.exit(1); } BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filename))); SeqDataLoader loader = new SeqDataLoader(); MetadataLoader core = loader.getMetadataLoader(); java.sql.Connection cxn = null; PreparedStatement update = null; try{ cxn = DatabaseConnectionManager.getConnection("seqdata"); cxn.setAutoCommit(true); //Iterate through the file String line = null; while ((line = reader.readLine()) != null) { String[] fields = line.split("\t"); if(!fields[0].equals("ReadDBID") && !fields[0].startsWith("#")){//skip the first line in the deepseq.list file //Variables Integer dbid = new Integer(fields[0]); String alignpieces[] = fields[33].split(";"); Genome genome = Genome.findGenome(fields[8]); String etypestring = fields[1]; String labstring = fields[2]; String conditionstring = fields[3]; String targetstring = fields[4]; String cellsstring = fields[5]; String rtypestring = fields[14]; String atypestring = fields[15]; Integer readlength = new Integer(fields[16]); String numreadsStr = new String(fields[17]); Integer numreads;//Paired reads are counted as one if(numreadsStr.contains("+")){ String[] tmp = numreadsStr.split("\\+"); numreads = new Integer(tmp[0]); }else numreads = new Integer(numreadsStr); Integer numhits = new Integer(fields[20]); Float totalweight = new Float(fields[21]); Integer numtype2hits = new Integer(fields[22]); Float totaltype2weight = new Float(fields[23]); Integer numpairs = new Integer(fields[24]); Float totalpairweight = new Float(fields[25]); String permissions = fields[9]; String collabexptid = fields[12]; String collabalignid = fields[13]; String publicsource = fields[10]; String publicdbid = fields[11]; String fqfile = fields[26]; String aligndir = fields[27]; String alignfile = fields[28]; String idxfile = fields[29]; String paramsfname = fields[30]; String exptnote = fields[31]; //Have to load alignment and experiment by DBID since the naming may change SeqExpt expt = null; SeqAlignment alignment = loader.loadAlignment(dbid); if (alignment == null) { //cxn.rollback(); throw new DatabaseException("Can't find alignment "+dbid+" for " + alignpieces[2] + " for " + alignpieces[0]); }else{ //Update experiment int exptID = alignment.getExpt().getDBID(); boolean exptExists=false; try { expt = loader.loadExperiment(exptID); exptExists=true; } catch (NotFoundException e) { //cxn.rollback(); System.err.println("No experiment found for " + alignpieces[0] + ";" + alignpieces[1] + ";" + alignpieces[2]); System.exit(1); } if(exptExists){ System.err.println("Updating experiment " + alignpieces[0] + ";" + alignpieces[1] + ";" + alignpieces[2]); update = SeqExpt.createUpdateWithID(cxn); update.setString(1, alignpieces[0]); update.setString(2, alignpieces[1]); update.setInt(3, genome.getSpeciesDBID()); update.setInt(4, core.loadExptType(etypestring, true, false).getDBID()); update.setInt(5, core.loadLab(labstring, true, false).getDBID()); update.setInt(6, core.loadExptCondition(conditionstring, true, false).getDBID()); update.setInt(7, core.loadExptTarget(targetstring, true, false).getDBID()); update.setInt(8, core.loadCellLine(cellsstring, true, false).getDBID()); update.setInt(9, core.loadReadType(rtypestring, true, false).getDBID()); update.setInt(10, readlength); update.setInt(11, numreads); update.setString(12, collabexptid); update.setString(13, publicsource); update.setString(14, publicdbid); update.setString(15, fqfile); update.setString(16, exptnote); update.setInt(17, expt.getDBID()); update.execute(); try { expt = loader.loadExperiment(alignpieces[0], alignpieces[1]); } catch (NotFoundException e2) { /* failed again means the update failed. you lose */ //cxn.rollback(); throw new DatabaseException("Couldn't update experiment for " + alignpieces[0] + "," + alignpieces[1]); } } //Alignment already loaded above try { update = SeqAlignment.createUpdateStatementWithID(cxn); System.err.println("Updating alignment " + alignpieces[0] + ";" + alignpieces[1] + ";" + alignpieces[2]); update.setInt(1, expt.getDBID()); update.setString(2, alignpieces[2]); update.setInt(3, genome.getDBID()); update.setString(4, permissions); update.setInt(5, core.loadAlignType(atypestring, true, false).getDBID()); update.setInt(6, numhits); update.setFloat(7, totalweight); update.setInt(8, numtype2hits); update.setFloat(9, totaltype2weight); update.setInt(10, numpairs); update.setFloat(11, totalpairweight); update.setString(12, aligndir); update.setString(13, alignfile); update.setString(14, idxfile); update.setString(15, collabalignid); update.setInt(16, dbid); update.execute(); alignment = loader.loadAlignment(expt, alignpieces[2], genome); File f = null; if (paramsfname != null) { f = new File(paramsfname); } if (f != null && f.exists()) { System.err.println("Reading alignment parameters from " + f); loader.addAlignmentParameters(alignment, f); } } catch (IOException e) { //cxn.rollback(); System.err.println("Couldn't add alignment parameters"); e.printStackTrace(); } } System.out.println(alignment.getDBID()); } } } catch (UnknownRoleException e) { throw new IllegalArgumentException("Unknown role: seqdata" + e); } finally { if (update != null) { try {update.close(); } catch (SQLException ex) { }} if(cxn!=null) try {cxn.close();}catch (Exception ex) {throw new DatabaseException("Couldn't close connection with role seqdata"+ ex); } } loader.close(); reader.close(); } }