package gov.nih.ncgc.bard.resourcemgr.extresource.go; import gov.nih.ncgc.bard.resourcemgr.BardDBUtil; import gov.nih.ncgc.bard.resourcemgr.BardExtResourceLoader; import gov.nih.ncgc.bard.resourcemgr.IBardExtResourceLoader; import gov.nih.ncgc.bard.resourcemgr.extresource.ontology.go.GONode; import gov.nih.ncgc.bard.resourcemgr.extresource.ontology.go.GOQueryWorker; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.HashSet; import java.util.Hashtable; import java.util.Iterator; import java.util.Set; import java.util.Vector; import java.util.logging.Logger; public class BardGOEntityLoader extends BardExtResourceLoader implements IBardExtResourceLoader { static final private Logger logger = Logger.getLogger(BardGOEntityLoader.class.getName()); private String sqlSelectAssayTargetFromBiology = "select entity_id, ext_id from bard_biology where biology_dict_id = 1398 and entity='assay'"; private String sqlSelectGOAssayTargetFromBiology = "select entity_id, ext_id from bard_biology where biology_dict_id = 1419 and entity='assay'"; private String sqlInsertAssayGO = "insert into temp_go_assay (bard_assay_id, target_acc, current_acc, go_id, go_term, go_type, ev_code, implied, " + "go_assoc_db_ref, assoc_date)" + " values (?,?,?,?,?,?,?,?,?,?)"; // private String sqlUpdateAssayGoDBRefAndDate = "update temp_go_assay a join go_association b on a.target_acc=b.accession and a.go_id=b.term_acc and a.ev_code=b.evidence " + "set a.go_assoc_db_ref=b.db_ref, a.assoc_date=b.assoc_date where a.implied = 0"; private String sqlUpdateProjectGoDBRefAndDate = "update temp_go_project a join go_association b on a.target_acc=b.accession and a.go_id=b.term_acc and a.ev_code=b.evidence " + "set a.go_assoc_db_ref=b.db_ref, a.assoc_date=b.assoc_date where a.implied = 0"; private String sqlUpdateCompoundGoDBRefAndDate = "update temp_go_compound a join go_association b on a.target_acc=b.accession and a.go_id=b.term_acc and a.ev_code=b.evidence " + "set a.go_assoc_db_ref=b.db_ref, a.assoc_date=b.assoc_date where a.implied = 0"; private String sqlSelectProjectTargets = "select bard_proj_id, accession from project_target where accession is not null order by bard_proj_id asc"; private String sqlSelectProjectTargetFromBiology = "select entity_id, ext_id from bard_biology where biology_dict_id = 1398 and entity='project'"; private String sqlSelectGOProjectTargetFromBiology = "select entity_id, ext_id from bard_biology where biology_dict_id = 1419 and entity='project'"; private String sqlInsertProjectGO = "insert into temp_go_project (bard_proj_id, target_acc, current_acc, go_id, go_term, go_type, ev_code, implied, " + "go_assoc_db_ref, assoc_date)" + " values (?,?,?,?,?,?,?,?,?,?)"; private String sqlInsertCompoundGO = "insert into temp_go_compound (cid, target_acc, go_id, go_term, go_type, ev_code, implied)" + " values (?,?,?,?,?,?,?)"; PreparedStatement queryAccessionPS, insertGOPS; private String sqlSelectCompoundTarget = "select cid, val from compound_annot where annot_key ='TARGETS'"; private String sqlSelectCompoundTargetFromCompoundTarget = "select cid, target_acc from compound_target"; private long insertCnt; private long accessionCnt; @Override public boolean load() { boolean loaded = false; log.info("In load() in BardGOEntityLoader. Reading service key."); try { if(service.getServiceKey().contains("GO-ENTITY-REFRESH")) { log.info("Starting GO Entity Refresh, first go_assay, then go_project."); //refresh go_assay and go_project loadGOAssay(); //load additional direct go from bard_biology log.info("Starting GO Load from Biology"); loadGoAssayFromBiology(); log.info("Finished GO_ASSAY. Starting on GO_PROJECT."); //refresh go project loadGOProject(); log.info("Finished GO_PROJECT Load from Protein targets."); log.info("Starting GO PROJECT Load from Biology"); loadGoProjectFromBiology(); //swap temp to production table BardDBUtil.swapTempTableToProductionIfPassesSizeDelta("temp_go_assay", "go_assay", 0.90, service.getDbURL()); BardDBUtil.swapTempTableToProductionIfPassesSizeDelta("temp_go_project", "go_project", 0.90, service.getDbURL()); loaded = true; } } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return loaded; } public void loadGOAssay() { try { Connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(true); log.info("Assay load connection established"); //set up the tables Statement stmt = conn.createStatement(); stmt.execute("create table if not exists temp_go_assay like go_assay"); stmt.execute("truncate table temp_go_assay"); stmt.close(); log.info("Initialized temp_go_assay"); insertCnt = 0; GOQueryWorker worker = new GOQueryWorker(); worker.prepareStatements(service.getDbURL()); queryAccessionPS = conn.prepareStatement(sqlSelectAssayTargetFromBiology); //queryAccessionPS.setFetchSize(Integer.MIN_VALUE); insertGOPS = conn.prepareStatement(this.sqlInsertAssayGO); ResultSet rs = queryAccessionPS.executeQuery(); HashSet <GONode> set = new HashSet <GONode> (); long bardAssayID; long assayID; String accession; logger.info("accession result set"); Vector <Long> bardAssayIdV = new Vector<Long>(); Vector <Long> aids = new Vector<Long>(); Vector <String> accV = new Vector<String>(); //rather than collecting all go-nodes for all accessions, process each accession before building new. // Hashtable <String, HashSet <Long>> accToAssayHash = new Hashtable <String, HashSet <Long>>(); Hashtable <String, HashSet <Long>> accToBardIDHash = new Hashtable <String, HashSet <Long>>(); //capture the current uniprot accession Hashtable <String, String> accToCurrAccHash = new Hashtable<String,String>(); String currAcc; while(rs.next()) { bardAssayID = rs.getLong(1); accession = rs.getString(2).trim(); //only go out to get current if we don't have it if(accToCurrAccHash.get(accession) == null) { currAcc = getCurrentAccession(accession); if(currAcc != null) { accToCurrAccHash.put(accession, currAcc); if(!accession.equals(currAcc)) { log.info("Accession "+accession+" is not current. Updated accesion = "+currAcc); } } else { log.warning("Couldn't retrieve current uniprot for acc="+accession+" bardAssayId="+bardAssayID); accToCurrAccHash.put(accession, accession); } } if(accToBardIDHash.get(accession) == null) { HashSet <Long> bardAssayV = new HashSet<Long>(); bardAssayV.add(bardAssayID); accToBardIDHash.put(accession, bardAssayV); } else { accToBardIDHash.get(accession).add(bardAssayID); } bardAssayIdV.add(bardAssayID); accV.add(accession); } rs.close(); log.info("Collected Targets: Accession count="+accV.size()+" Assay Count="+bardAssayIdV.size()); int aidAccCnt = 0; //maybe collect all nodes into a hash or two that is keyed by go_id and go_acc //we can pull nodes from the hash as needed to support queries. //we won't need to build and destroy nodes, just build references to the nodes //prepare the worker worker.populateNodeHashes(); Set <String> accKeys = accToBardIDHash.keySet(); for(String accKey: accKeys) { set.clear(); currAcc = accToCurrAccHash.get(accKey); log.info("process accession="+accKey); //reset implied to false worker.setAllNodeImplied(false); //reset direct //get accessions nodes set.addAll(worker.getGONodesForAccessionUsingHash(currAcc)); //set.addAll(worker.getGONodesForAccession(accKey)); HashSet <GONode> newSet = new HashSet <GONode>(); //get accession's node's ancestors for(GONode node: set) { //go up the hierarchy to get implied for this accession Vector <GONode> nodes = worker.getPredNodesFromHash(node); for(GONode n: nodes) { //don't overwrite if it exists (primary), if doesn't exist, it's implied if(!set.contains(n)) { n.setImplied(true); n.setEvCode("GO_ANCESTOR_TERM"); newSet.add(n); } } } newSet.addAll(set); int index = 0; Iterator <Long> bardIdEnum = accToBardIDHash.get(accKey).iterator(); for(long bad:accToBardIDHash.get(accKey)) { bardAssayID = bardIdEnum.next(); insertGOData(bardAssayID, bad, accKey, accToCurrAccHash.get(accKey), newSet); aidAccCnt++; index++; } } insertGOPS.executeBatch(); log.info("Finished Temp Load"); //set details of assocation log.info("update association date and db ref in temp tables"); stmt = conn.createStatement(); stmt.executeUpdate(sqlUpdateAssayGoDBRefAndDate); conn.close(); log.info("Done Load"); } catch (SQLException sqle) { sqle.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } private void loadGoAssayFromBiology() { try { log.info("Loading GO ASSAY from Biology GO"); Connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(sqlSelectGOAssayTargetFromBiology); Hashtable <String, Vector<Long>> goToBardExptIdHash = new Hashtable<String, Vector<Long>>(); long bardExptId; String goId; Vector <Long> bardExptIdV = new Vector<Long>(); int goLength = 0; while(rs.next()) { bardExptId = rs.getLong(1); goId = rs.getString(2); if(!goId.startsWith("GO:")) { goLength = goId.length(); for(int i = 0; i < 7-goLength;i++) goId = "0"+goId; goId = "GO:"+goId; } bardExptIdV = goToBardExptIdHash.get(goId); if(bardExptIdV == null) { bardExptIdV = new Vector<Long>(); bardExptIdV.add(bardExptId); goToBardExptIdHash.put(goId, bardExptIdV); } else { bardExptIdV.add(bardExptId); } } stmt.close(); log.info("Have collected GO_IDs for all bard_expt_id in bard_biology."); insertGOPS = conn.prepareStatement(this.sqlInsertAssayGO); HashSet <GONode> primaryGoNodes = new HashSet<GONode>(); Set <String> goIdSet = goToBardExptIdHash.keySet(); GOQueryWorker worker = new GOQueryWorker(); worker.prepareStatements(service.getDbURL()); worker.populateNodeHashes(); GONode node = new GONode(); HashSet <GONode> impliedV = new HashSet<GONode>(); for(String directGoId : goIdSet) { worker.setAllNodeImplied(true); log.info("Processing direct GO for go_id="+directGoId); node = worker.getNodeForGoAcc(directGoId); if(node == null) { log.warning("GO BIOLOGY for assays update FAILED for CAP GO_ID (not a valid id)="+directGoId); continue; } node.setImplied(false); impliedV.addAll(worker.getPredNodesFromHash(node)); for(GONode n : impliedV) { n.setEvCode("CAP_ANCESTOR_TERM"); } node.setEvCode("CAP_DIRECT_TERM"); impliedV.add(node); bardExptIdV = goToBardExptIdHash.get(node.getGoAccession()); for(long beid : bardExptIdV) { insertGOData(beid, 0, "", "", impliedV); } insertGOPS.executeBatch(); impliedV.clear(); } conn.close(); log.info("Done Load"); } catch (SQLException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } public String getCurrentAccession(String acc) { String currAcc = null; String uniprotURLStr = "http://www.uniprot.org/uniprot/"+acc+".txt"; boolean stopSearch = false; try { URL uniprotURL = new URL(uniprotURLStr); InputStream is = (InputStream)uniprotURL.getContent(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); String line; while((line = br.readLine()) != null && !stopSearch) { if(line.startsWith("AC")) { line = line.substring(3); currAcc = line.split(";")[0].trim(); stopSearch = true; } } br.close(); is.close(); } catch (MalformedURLException e) { log.warning("Malformed uniprot url:"+uniprotURLStr); e.printStackTrace(); return null; } catch (IOException e) { log.warning("IOException during retrieval of current accession from:"+uniprotURLStr); e.printStackTrace(); return null; } return currAcc; } public void loadGOProject() { try { Connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(true); log.info("Project load connection established"); //set up the tables Statement stmt = conn.createStatement(); stmt.execute("create table if not exists temp_go_project like go_project"); stmt.execute("truncate table temp_go_project"); stmt.close(); log.info("Initialized temp_go_project"); insertCnt = 0; GOQueryWorker worker = new GOQueryWorker(); worker.prepareStatements(service.getDbURL()); queryAccessionPS = conn.prepareStatement(sqlSelectProjectTargetFromBiology); insertGOPS = conn.prepareStatement(sqlInsertProjectGO); ResultSet rs = queryAccessionPS.executeQuery(); HashSet <GONode> set = new HashSet <GONode> (); long projectID; String accession; logger.info("accession result set"); Vector <String> accV = new Vector<String>(); //rather than collecting all go-nodes for all accessions, process each accession before building new. Hashtable <String, HashSet <Long>> accToProjectHash = new Hashtable <String, HashSet <Long>>(); //capture the current uniprot accession Hashtable <String, String> accToCurrAccHash = new Hashtable<String,String>(); String currAcc; while(rs.next()) { projectID = rs.getLong(1); accession = rs.getString(2).trim(); //only go out to get current if we don't have it if(accToCurrAccHash.get(accession) == null) { currAcc = getCurrentAccession(accession); if(currAcc != null) { accToCurrAccHash.put(accession, currAcc); if(!accession.equals(currAcc)) { log.info("Accession "+accession+" is not current. Updated accesion = "+currAcc); } } else { log.warning("Couldn't retrieve current uniprot for acc="+accession+" bardProgId="+projectID); accToCurrAccHash.put(accession, accession); } } if(accToProjectHash.get(accession) == null) { HashSet <Long> v = new HashSet<Long>(); v.add(projectID); accToProjectHash.put(accession, v); } else { accToProjectHash.get(accession).add(projectID); } accV.add(accession); } rs.close(); int aidAccCnt = 0; log.info("Collected Targets: Accession count="+accV.size()+" Assay Count="+accToProjectHash.size()); //maybe collect all nodes into a hash or two that is keyed by go_id and go_acc //we can pull nodes from the hash as needed to support queries. //we won't need to build and destroy nodes, just build references to the nodes //prepare the worker worker.populateNodeHashes(); Set <String> accKeys = accToProjectHash.keySet(); for(String accKey: accKeys) { set.clear(); currAcc = accToCurrAccHash.get(accKey); log.info("process accession="+accKey); //reset implied to false worker.setAllNodeImplied(false); //get accessions nodes set.addAll(worker.getGONodesForAccessionUsingHash(currAcc)); //set.addAll(worker.getGONodesForAccession(accKey)); HashSet <GONode> newSet = new HashSet <GONode>(); //get accession's node's ancestors for(GONode node: set) { //go up the hierarchy to get implied for this accession Vector <GONode> nodes = worker.getPredNodesFromHash(node); for(GONode n: nodes) { //don't overwrite if it exists (primary), if doesn't exist, it's implied if(!set.contains(n)) { n.setImplied(true); n.setEvCode("GO_ANCESTOR_TERM"); newSet.add(n); } } } newSet.addAll(set); for(long projID:accToProjectHash.get(accKey)) { insertGODataForProject(projID, accKey, accToCurrAccHash.get(accKey), newSet); aidAccCnt++; } //logger.info("in gc()"); //System.gc(); } insertGOPS.executeBatch(); log.info("Finished Temp Load"); //set details of assocation log.info("update association date and db ref in temp tables"); stmt = conn.createStatement(); stmt.executeUpdate(sqlUpdateProjectGoDBRefAndDate); //swap tables //BardDBUtil.swapTempTableToProductionIfPassesSizeDelta("temp_go_project", "go_project", 0.90, service.getDbURL()); conn.close(); logger.info("Done Load"); } catch (SQLException sqle) { sqle.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } private void loadGoProjectFromBiology() { try { log.info("Loading GO PROJECT from Biology GO"); Connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(true); Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(sqlSelectGOProjectTargetFromBiology); Hashtable <String, Vector<Long>> goToBardExptIdHash = new Hashtable<String, Vector<Long>>(); long bardExptId; String goId; Vector <Long> bardExptIdV = new Vector<Long>(); int goLength = 0; while(rs.next()) { bardExptId = rs.getLong(1); goId = rs.getString(2); if(!goId.startsWith("GO:")) { goLength = goId.length(); for(int i = 0; i < 7-goLength;i++) goId = "0"+goId; goId = "GO:"+goId; } bardExptIdV = goToBardExptIdHash.get(goId); if(bardExptIdV == null) { bardExptIdV = new Vector<Long>(); bardExptIdV.add(bardExptId); goToBardExptIdHash.put(goId, bardExptIdV); } else { bardExptIdV.add(bardExptId); } } stmt.close(); log.info("Have collected GO_IDs for all bard_expt_id in bard_biology (go_project update)."); insertGOPS = conn.prepareStatement(this.sqlInsertProjectGO); HashSet <GONode> primaryGoNodes = new HashSet<GONode>(); Set <String> goIdSet = goToBardExptIdHash.keySet(); GOQueryWorker worker = new GOQueryWorker(); worker.prepareStatements(service.getDbURL()); worker.populateNodeHashes(); GONode node = new GONode(); HashSet <GONode> impliedV = new HashSet<GONode>(); for(String directGoId : goIdSet) { worker.setAllNodeImplied(true); log.info("Processing direct GO for go_id="+directGoId); node = worker.getNodeForGoAcc(directGoId); if(node == null) { log.warning("GO BIOLOGY for projects update FAILED for CAP GO_ID (not a valid id)="+directGoId); continue; } node.setImplied(false); impliedV.addAll(worker.getPredNodesFromHash(node)); for(GONode n : impliedV) { n.setEvCode("CAP_ANCESTOR_TERM"); } node.setEvCode("CAP_DIRECT_TERM"); impliedV.add(node); bardExptIdV = goToBardExptIdHash.get(node.getGoAccession()); for(long beid : bardExptIdV) { insertGODataForProject(beid, "", "", impliedV); } insertGOPS.executeBatch(); impliedV.clear(); } conn.close(); log.info("Done Load"); } catch (SQLException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } public void loadGOCompound() { try { Connection conn = BardDBUtil.connect(service.getDbURL()); conn.setAutoCommit(true); insertCnt = 0; GOQueryWorker worker = new GOQueryWorker(); worker.prepareStatements(service.getDbURL()); queryAccessionPS = conn.prepareStatement(sqlSelectCompoundTarget); insertGOPS = conn.prepareStatement(sqlInsertCompoundGO); ResultSet rs = queryAccessionPS.executeQuery(); HashSet <GONode> set = new HashSet <GONode> (); long cid; String accession; logger.info("accession result set"); Vector <Long> cids = new Vector<Long>(); Vector <String> accV = new Vector<String>(); //rather than collecting all go-nodes for all accessions, process each accession before building new. Hashtable <String, HashSet <Long>> accToAssayHash = new Hashtable <String, HashSet <Long>>(); String [] toks; while(rs.next()) { cid = rs.getLong(1); toks = rs.getString(2).split("\\|"); if(toks.length > 3) { accession = toks[3].trim(); logger.info("Have Accession"+accession); } else continue; logger.info("cid capture, cid="+cid); if(accToAssayHash.get(accession) == null) { HashSet <Long> v = new HashSet<Long>(); v.add(cid); accToAssayHash.put(accession, v); } else { accToAssayHash.get(accession).add(cid); } cids.add(cid); accV.add(accession); } rs.close(); //need to populate based on compound_target too queryAccessionPS = conn.prepareStatement(sqlSelectCompoundTargetFromCompoundTarget); rs = queryAccessionPS.executeQuery(); while(rs.next()) { cid = rs.getLong(1); accession = rs.getString(2); logger.info("cid compound/target capture, cid="+cid); if(accToAssayHash.get(accession) == null) { HashSet <Long> v = new HashSet<Long>(); v.add(cid); accToAssayHash.put(accession, v); } else { accToAssayHash.get(accession).add(cid); } cids.add(cid); accV.add(accession); } //maybe collect all nodes into a hash or two that is keyed by go_id and go_acc //we can pull nodes from the hash as needed to support queries. //we won't need to build and destroy nodes, just build references to the nodes //prepare the worker worker.populateNodeHashes(); Set <String> accKeys = accToAssayHash.keySet(); for(String accKey: accKeys) { set.clear(); logger.info("process accession="+accKey); //reset implied to false worker.setAllNodeImplied(false); //get accessions nodes set.addAll(worker.getGONodesForAccessionUsingHash(accKey)); //set.addAll(worker.getGONodesForAccession(accKey)); HashSet <GONode> newSet = new HashSet <GONode>(); //get accession's node's ancestors for(GONode node: set) { //go up the hierarchy to get implied for this accession Vector <GONode> nodes = worker.getPredNodesFromHash(node); for(GONode n: nodes) { //don't overwrite if it exists (primary), if doesn't exist, it's implied if(!set.contains(n)) { n.setImplied(true); n.setEvCode("GO_ANCESTOR_TERM"); newSet.add(n); } } } newSet.addAll(set); for(long aid:accToAssayHash.get(accKey)) { insertGODataForCompound(aid, accKey, newSet); } } // // for(long aid:aids) { // accession = accV.get(aidAccCnt); // set.clear(); // // //get accessions nodes // set.addAll(worker.getGONodesForAccession(accession)); // // HashSet <GONode> newSet = new HashSet <GONode>(); // // //get accession's node's ancestors // for(GONode node: set) { // //go up the hierarchy to get implied for this accession // Vector <GONode> nodes = worker.getPredNodes(node); // // for(GONode n: nodes) { // //don't overwrite if it exists (primary), if doesn't exist, it's implied // if(!set.contains(n)) { // n.setImplied(true); // n.setEvCode("GO_ANCESTOR_TERM"); // newSet.add(n); // } // } // } // // // newSet.addAll(set); // // // logger.info("handling aid/accession="+aid+" "+accession); // //process just the inserts for this one accession // insertGOData(aid, accession, newSet); // aidAccCnt++; // } insertGOPS.executeBatch(); conn.commit(); //set details of assocation logger.info("update compound assoc dbref and date"); Statement stmt = conn.createStatement(); stmt.executeUpdate(sqlUpdateCompoundGoDBRefAndDate); conn.close(); logger.info("Done Load"); } catch (SQLException sqle) { sqle.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private void insertGOData(long bardAssayID, long assayID, String accession, String currAcc, Set <GONode> nodeSet) throws SQLException { String ontologyType; // logger.info(assayID+" "+accession+" set size="+nodeSet.size()); for(GONode node : nodeSet) { insertCnt++; this.insertGOPS.setLong(1, bardAssayID); //this.insertGOPS.setLong(2, assayID); this.insertGOPS.setString(2, accession); this.insertGOPS.setString(3, currAcc); this.insertGOPS.setString(4, node.getGoAccession()); this.insertGOPS.setString(5, node.getGoName()); ontologyType = node.getGoOntologyType(); if(ontologyType.equals("biological_process")) ontologyType = "P"; if(ontologyType.equals("molecular_function")) ontologyType = "F"; if(ontologyType.equals("cellular_component")) ontologyType = "C"; this.insertGOPS.setString(6, ontologyType); this.insertGOPS.setString(7, node.getEvCode()); this.insertGOPS.setInt(8, node.isImplied() ? 1 : 0); this.insertGOPS.setNull(9, java.sql.Types.VARCHAR); this.insertGOPS.setNull(10, java.sql.Types.VARCHAR); this.insertGOPS.addBatch(); if(insertCnt % 20 == 0) { insertGOPS.executeBatch(); insertGOPS.clearBatch(); } } insertGOPS.executeBatch(); } private void insertGODataForProject(long projectID, String accession, String currAcc, Set <GONode> nodeSet) throws SQLException { String ontologyType; // logger.info(assayID+" "+accession+" set size="+nodeSet.size()); for(GONode node : nodeSet) { insertCnt++; this.insertGOPS.setLong(1, projectID); this.insertGOPS.setString(2, accession); this.insertGOPS.setString(3, currAcc); this.insertGOPS.setString(4, node.getGoAccession()); this.insertGOPS.setString(5, node.getGoName()); ontologyType = node.getGoOntologyType(); if(ontologyType.equals("biological_process")) ontologyType = "P"; if(ontologyType.equals("molecular_function")) ontologyType = "F"; if(ontologyType.equals("cellular_component")) ontologyType = "C"; this.insertGOPS.setString(6, ontologyType); this.insertGOPS.setString(7, node.getEvCode()); this.insertGOPS.setInt(8, node.isImplied() ? 1 : 0); this.insertGOPS.setNull(9, java.sql.Types.VARCHAR); this.insertGOPS.setNull(10, java.sql.Types.VARCHAR); this.insertGOPS.addBatch(); if(insertCnt % 20 == 0) { insertGOPS.executeBatch(); insertGOPS.clearBatch(); } } insertGOPS.executeBatch(); } private void insertGODataForCompound(long cid, String accession, Set <GONode> nodeSet) throws SQLException { String ontologyType; // logger.info(assayID+" "+accession+" set size="+nodeSet.size()); for(GONode node : nodeSet) { insertCnt++; this.insertGOPS.setLong(1, cid); this.insertGOPS.setString(2, accession); this.insertGOPS.setString(3, node.getGoAccession()); this.insertGOPS.setString(4, node.getGoName()); ontologyType = node.getGoOntologyType(); if(ontologyType.equals("biological_process")) ontologyType = "P"; if(ontologyType.equals("molecular_function")) ontologyType = "F"; if(ontologyType.equals("cellular_component")) ontologyType = "C"; this.insertGOPS.setString(5, ontologyType); this.insertGOPS.setString(6, node.getEvCode()); this.insertGOPS.setInt(7, node.isImplied() ? 1 : 0); this.insertGOPS.addBatch(); if(insertCnt % 10 == 0) { insertGOPS.executeBatch(); insertGOPS.clearBatch(); conn.commit(); logger.info("Insert Count = "+insertCnt); } } } public static void main(String [] args) { BardGOEntityLoader loader = new BardGOEntityLoader(); //loader.loadGOCompound(); //loader.loadGO(); System.out.println("**"+loader.getCurrentAccession("D3Z2V4")+"**"); } @Override public String getLoadStatusReport() { // TODO Auto-generated method stub return null; } }