package gov.nih.ncgc.bard.resourcemgr.extresource.ontology.go; import gov.nih.ncgc.bard.resourcemgr.BardDBUtil; import gov.nih.ncgc.bard.resourcemgr.extresource.ontology.OntologyNode; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.HashSet; import java.util.Hashtable; import java.util.Set; import java.util.Stack; import java.util.Vector; import java.util.logging.Logger; public class GOQueryWorker { static final private Logger logger = Logger.getLogger(GOQueryWorker.class.getName()); //these will come from properties private String dbURL = "jdbc:mysql://protein.nhgri.nih.gov:3306/bard2"; private String driverName = "com.mysql.jdbc.Driver"; private Connection conn; private Hashtable <Integer, GONode> idNodeHash; private Hashtable <String, GONode> goAccNodeHash; public GOQueryWorker() { } public void prepareStatements(String dbURL) { try { conn = BardDBUtil.connect(dbURL); parentPS = conn.prepareStatement("select term1_id from go_term2term where term2_id = ?"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public void populateIdNodeHash() throws SQLException { idNodeHash = new Hashtable <Integer, GONode> (); PreparedStatement ps = conn.prepareStatement("select id from go_term"); ResultSet rs = ps.executeQuery(); while(rs.next()) { idNodeHash.put(rs.getInt(1), this.getNodeForID(rs.getInt(1))); //logger.info("id node hash for id:"+rs.getInt(1)); } rs.close(); } public void populateNodeHashes() throws SQLException { populateIdNodeHash(); populateGoAccNodeHashFromIdHash(); } public void populateGoAccNodeHash() throws SQLException { goAccNodeHash = new Hashtable <String, GONode> (); PreparedStatement ps = conn.prepareStatement("select acc from go_term"); ResultSet rs = ps.executeQuery(); while(rs.next()) { goAccNodeHash.put(rs.getString(1), getNodeForGoAcc(rs.getString(1))); } rs.close(); } public void populateGoAccNodeHashFromIdHash() throws SQLException { goAccNodeHash = new Hashtable <String, GONode> (); Set <Integer> keys = idNodeHash.keySet(); GONode currNode; for(int key : keys) { currNode = idNodeHash.get(key); goAccNodeHash.put(currNode.getGoAccession(), currNode); //logger.info("pop go acc node hash from id hash "+currNode.getGoAccession()); } } public GONode appendChildNodesToNode(GONode node) { GONode root = node; return root; } public GONode appendParentNodesToNode(GONode node) { GONode root = node; try { Vector <Integer> visitedAccList = new Vector <Integer>(); Vector <Integer> goAccList = new Vector <Integer>(); Vector <Integer> resultList = new Vector <Integer>(); Vector <OntologyNode> resultNodes = new Vector <OntologyNode>(); PreparedStatement idStmt = conn.prepareStatement("select id from go_term where acc = ?"); idStmt.setString(1, node.getGoAccession()); int goID = 0; ResultSet idRS = idStmt.executeQuery(); if(idRS.next()) { goID = idRS.getInt(1); } else { return null; } PreparedStatement childQuery = conn.prepareStatement("select term1_id from go_term2term where term2_id = ?"); //initialize goAccList goAccList.add(goID); ResultSet childRS; //root node GONode currentNode = createAndPopulateNodeFromID(goID); root = currentNode; resultNodes.add(currentNode); int currentID; int listIndex = 0; //use a stack-pop approach to avoid recursion record overhead on stack memory while (goAccList.size() > 0) { // for(int listIndex = 0; listIndex < goAccList.size(); listIndex++) { goID = goAccList.get(0); //always get the first in queue currentNode = (GONode) resultNodes.get(0); childQuery.setInt(1, goID); childRS = childQuery.executeQuery(); resultList.clear(); while(childRS.next()) { currentID = childRS.getInt(1); resultList.add(currentID); } //the result list has the current id list //these are children of the current node in the go accession list //push the child nodes onto the goAccessionList goAccList.addAll(resultList); //register the current id as visited visitedAccList.add(goID); //pop the current id from the source list goAccList.remove(0); resultNodes.remove(0); //now add children addAncestorNodes(currentNode, resultList); //add children to resultNodes list if(currentNode.getParenetCount() > 0) resultNodes.addAll(currentNode.getParentNodes()); //} } } catch (SQLException sqle) { sqle.printStackTrace(); } return root; } private PreparedStatement parentPS; public Vector <GONode> getPredNodes(GONode node) { Vector <GONode> nodeVector = new Vector <GONode>(); // logger.info("In query worker. Getting Ancestor Nodes for "+node.getGoID()); HashSet <Integer> set = collectPredecessorNodesStack(node.getGoID()); for(Integer id : set) { nodeVector.add(this.createAndPopulateNodeFromID(id)); } return nodeVector; } public void setAllNodeImplied(boolean implied) { Set <Integer> keys = idNodeHash.keySet(); GONode currNode; for(int key : keys) { currNode = idNodeHash.get(key); currNode.setImplied(implied); } } public void clearDirectAssocNodes(boolean implied) { Set <Integer> keys = idNodeHash.keySet(); GONode currNode; for(int key : keys) { currNode = idNodeHash.get(key); currNode.clearDirectAssocNodes(); } } public Vector <GONode> getPredNodesFromHash(GONode node) { Vector <GONode> nodeVector = new Vector <GONode>(); // logger.info("In query worker. Getting Ancestor Nodes for "+node.getGoID()); HashSet <Integer> set = collectPredecessorNodesStack(node.getGoID()); for(Integer id : set) { nodeVector.add(idNodeHash.get(id)); } return nodeVector; } private void collectPredecessorNodes(int id, HashSet <Integer> set) { try { parentPS.setInt(1, id); // logger.info("collecting pred nodes for id = "+id+" set size = " + set.size()); ResultSet rs = parentPS.executeQuery(); HashSet <Integer> newSet = new HashSet <Integer> (); int rsInt; while(rs.next()) { rsInt = rs.getInt(1); if(rsInt != 36808) newSet.add(rsInt); } rs.close(); for(int currID: newSet) { //skip if the parents have been retrieved already if(id != 36808 && !set.contains(currID)) collectPredecessorNodes(currID, set); } set.addAll(newSet); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private HashSet <Integer> collectPredecessorNodesStack(int id) { HashSet <Integer> newSet = new HashSet <Integer> (); try { int rsInt; Stack <Integer> visitSet = new Stack <Integer> (); visitSet.add(id); while(!visitSet.isEmpty()) { parentPS.setInt(1, visitSet.pop()); ResultSet rs = parentPS.executeQuery(); while(rs.next()) { rsInt = rs.getInt(1); if(rsInt != 36808) { //just add novel nodes if(!newSet.contains(rsInt)) visitSet.push(rsInt); newSet.add(rsInt); } } } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return newSet; } private GONode buildRootAndChildren(String goAccession) { GONode root = null; try { Vector <Integer> visitedAccList = new Vector <Integer>(); Vector <Integer> goAccList = new Vector <Integer>(); Vector <Integer> resultList = new Vector <Integer>(); Vector <OntologyNode> resultNodes = new Vector <OntologyNode>(); PreparedStatement idStmt = conn.prepareStatement("select id from go_term where acc = ?"); idStmt.setString(1, goAccession); int goID = 0; ResultSet idRS = idStmt.executeQuery(); if(idRS.next()) { goID = idRS.getInt(1); } else { return null; } PreparedStatement childQuery = conn.prepareStatement("select term2_id from go_term2term where term1_id = ?"); //initialize goAccList goAccList.add(goID); ResultSet childRS; //root node GONode currentNode = createAndPopulateNodeFromID(goID); root = currentNode; resultNodes.add(currentNode); int currentID; int listIndex = 0; //use a stack-pop approach to avoid recursion record overhead on stack memory while (goAccList.size() > 0) { // for(int listIndex = 0; listIndex < goAccList.size(); listIndex++) { goID = goAccList.get(0); //always get the first in queue currentNode = (GONode) resultNodes.get(0); childQuery.setInt(1, goID); childRS = childQuery.executeQuery(); resultList.clear(); while(childRS.next()) { currentID = childRS.getInt(1); resultList.add(currentID); } //the result list has the current id list //these are children of the current node in the go accession list //push the child nodes onto the goAccessionList goAccList.addAll(resultList); //register the current id as visited visitedAccList.add(goID); //pop the current id from the source list goAccList.remove(0); resultNodes.remove(0); //now add children addChildNodes(currentNode, resultList); //add children to resultNodes list if(currentNode.getChildCount() > 0) resultNodes.addAll(currentNode.getChildNodes()); //} } } catch (SQLException sqle) { sqle.printStackTrace(); } return root; } private void addChildNodes(GONode rootNode, Vector <Integer> resultList) { GONode node; for(int i = 0; i < resultList.size(); i++) { node = createAndPopulateNodeFromID(resultList.get(i)); if(node != null) rootNode.addChildNode(node); } } private void addAncestorNodes(GONode rootNode, Vector <Integer> resultList) { GONode node; for(int i = 0; i < resultList.size(); i++) { node = createAndPopulateNodeFromID(resultList.get(i)); if(node != null) rootNode.addParentNode(node); } } public Connection connect(String dbURL, String driverName) { conn = null; try { Class.forName(driverName); conn= DriverManager.getConnection(dbURL, "bard_manager", "bard_manager"); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return conn; } public GONode createAndPopulateNodeFromID(int goID) { GONode node = null; try { PreparedStatement termQuery = conn.prepareStatement("select acc, name, term_type, is_obsolete from go_term where id = ?"); termQuery.setInt(1, goID); ResultSet rs = termQuery.executeQuery(); if(rs.next()) { node = new GONode(); node.setGoID(goID); node.setGoAccession(rs.getString(1)); node.setGoName(rs.getString(2)); node.setGoOntologyType(rs.getString(3)); node.setObsolete(rs.getBoolean(4)); } rs.close(); PreparedStatement assocQuery = conn.prepareStatement("select count(*) from go_association where term_acc = ?"); assocQuery.setString(1, node.getGoAccession()); rs = assocQuery.executeQuery(); if(rs.next()) { node.setAssociationCount(rs.getInt(1)); } else { node.setAssociationCount(0); } } catch (SQLException sqle) { sqle.printStackTrace(); } return node; } public boolean populateNodeFromID(int goID, GONode node) { boolean populated = false; try { PreparedStatement termQuery = conn.prepareStatement("select acc, name, term_type, is_obsolete from go_term where id = ?"); termQuery.setInt(1, goID); ResultSet rs = termQuery.executeQuery(); if(rs.next()) { node.setGoID(goID); node.setGoAccession(rs.getString(1)); node.setGoName(rs.getString(2)); node.setGoOntologyType(rs.getString(3)); node.setObsolete(rs.getBoolean(4)); populated = true; } } catch (SQLException sqle) { sqle.printStackTrace(); } return populated; } public GONode getNodeForID(int goID) { GONode node = null; try { PreparedStatement termQuery = conn.prepareStatement("select acc, name, term_type, is_obsolete from go_term where id = ?"); termQuery.setInt(1, goID); ResultSet rs = termQuery.executeQuery(); if(rs.next()) { node = new GONode(); node.setGoID(goID); node.setGoAccession(rs.getString(1)); node.setGoName(rs.getString(2)); node.setGoOntologyType(rs.getString(3)); node.setObsolete(rs.getBoolean(4)); } } catch (SQLException sqle) { sqle.printStackTrace(); } return node; } public GONode getNodeForGoAcc(String goAcc) { GONode node = null; try { PreparedStatement termQuery = conn.prepareStatement("select id, name, term_type, is_obsolete from go_term where acc = ?"); termQuery.setString(1, goAcc); ResultSet rs = termQuery.executeQuery(); if(rs.next()) { node = new GONode(); node.setGoAccession(goAcc); node.setGoID(rs.getInt(1)); node.setGoName(rs.getString(2)); node.setGoOntologyType(rs.getString(3)); node.setObsolete(rs.getBoolean(4)); } } catch (SQLException sqle) { sqle.printStackTrace(); } return node; } public Vector <String> collectUniqueProductAcc(GONode root) { Vector <String> accList = new Vector <String> (); Vector <OntologyNode> nodes = collectDescendentNodes(root); System.out.println("node count="+nodes.size()); try { PreparedStatement ps = conn.prepareStatement("select accession from go_association where term_acc = ?"); ResultSet rs; for(int i = 0; i < nodes.size(); i++) { ps.setString(1, ((GONode)(nodes.get(i))).getGoAccession()); rs = ps.executeQuery(); while(rs.next()) { accList.add(rs.getString(1)); } } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return accList; } public Vector <OntologyNode> collectDescendentNodes(OntologyNode root) { Vector <OntologyNode> nodeList = new Vector <OntologyNode> (); getDecendents(root, nodeList); return nodeList; } private void getDecendents(OntologyNode node, Vector <OntologyNode> nodeList) { Vector <OntologyNode> children = node.getChildNodes(); if(children != null) { for(int i = 0; i < children.size(); i++) { getDecendents(children.get(i), nodeList); } } if(!nodeList.contains(node)) nodeList.add(node); } public HashSet <GONode> getGONodesForAccession(String accession) { String sql = "select a.term_acc, a.term_type, a.evidence, b.id, b.name, a.db_ref, a.assoc_date from go_association a, " + "go_term b where a.accession = ? and a.term_acc=b.acc"; HashSet <GONode> nodes = new HashSet <GONode> (); try { PreparedStatement ps = conn.prepareStatement(sql); ps.setString(1, accession); ResultSet rs = ps.executeQuery(); GONode node; while(rs.next()) { node = new GONode(); node.setGoAccession(rs.getString(1)); node.setGoOntologyType(rs.getString(2)); node.setEvCode(rs.getString(3)); node.setGoID(rs.getInt(4)); node.setGoName(rs.getString(5)); node.setImplied(false); //build unique list of nodes for nodes.add(node); } rs.close(); //this would append parents, let's not make this automatic // Set <GONode> set = nodeHash.keySet(); // HashSet <GONode> uniqueSet = new HashSet <GONode> (); // for(GONode currNode : set) { // this.appendParentNodesToNode(currNode); // } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return nodes; } public HashSet <GONode> getGONodesForAccessionUsingHash(String accession) { String sql = "select a.term_acc, a.evidence, a.db_ref, a.assoc_date from go_association a where a.accession = ?"; HashSet <GONode> nodes = new HashSet <GONode> (); try { PreparedStatement ps = conn.prepareStatement(sql); ps.setString(1, accession); ResultSet rs = ps.executeQuery(); GONode node; String acc; while(rs.next()) { node = null; acc = rs.getString(1); node = goAccNodeHash.get(acc); if(node != null) { node.setEvCode(rs.getString(2)); node.setImplied(false); //build unique list of nodes for nodes.add(node); } else { logger.info("No go node for accession = **"+acc+"**"); } } rs.close(); //this would append parents, let's not make this automatic // Set <GONode> set = nodeHash.keySet(); // HashSet <GONode> uniqueSet = new HashSet <GONode> (); // for(GONode currNode : set) { // this.appendParentNodesToNode(currNode); // } } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } return nodes; } public static void main(String [] args) { GOQueryWorker worker = new GOQueryWorker(); worker.connect("jdbc:mysql://maxwell.ncats.nih.gov:3306/bard3", "com.mysql.jdbc.Driver"); worker.prepareStatements("jdbc:mysql://maxwell.ncats.nih.gov:3306/bard3"); GONode node = worker.createAndPopulateNodeFromID(35212); System.out.println(node); Vector <GONode> nodes = worker.getPredNodes(node); for(GONode n : nodes) { System.out.println(n); } System.out.println(nodes.size()); //GONode node = worker.buildRootAndChildren("GO:0006281"); //node.printTree(0); // worker.collectUniqueProductAcc(node); // // Vector <String> accV = worker.collectUniqueProductAcc(node); // System.out.println("unique accessions = " + accV.size()); // // String query = "select * from assay_target where accession in ("; // // Vector <String> v2 = new Vector <String>(); // for(int i = 0; i < accV.size(); i++) { // if(!v2.contains(accV.get(i))) { // v2.add(accV.get(i)); // query += "'"+accV.get(i)+"', "; // } // } // // System.out.println("unique accessions = " + v2.size()); // // query = query.substring(0, query.length()-3); // query += ")"; // // System.out.println(query); } }