package gov.nih.ncgc.bard.resourcemgr.extresource.pubchem;
import gov.nih.ncgc.bard.capextract.resultextract.BardExptDataResponse;
import gov.nih.ncgc.bard.resourcemgr.BardDBUtil;
import gov.nih.ncgc.bard.resourcemgr.BardExtResourceLoader;
import gov.nih.ncgc.bard.resourcemgr.BardExternalResource;
import gov.nih.ncgc.bard.resourcemgr.IBardExtResourceLoader;
import java.io.IOException;
import java.sql.Blob;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.Properties;
import java.util.Set;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
public class CIDSIDMappingLoader extends BardExtResourceLoader implements
IBardExtResourceLoader {
private long mapSize = 0;
private long deltaMapSize = 0;
@Override
public boolean load() {
boolean loaded = false;;
if(service != null) {
log.info("Loading CID SID Mapping");
//fetch file
fetchExternalResource();
//rebuild CID_SID
deltaMapSize = rebuildCIDSIDViaTempLoad();
//set response message
statusText = "Completed CID SID Mapping Refresh. Mapping entries have increased by "+deltaMapSize;
//backfill missing cids in data
long updates = backfillNullCIDsInData();
statusText += " ("+updates+" data CIDs backfilled)";
loaded = true;
} else {
return false;
}
return loaded;
}
@Override
public String getLoadStatusReport() {
// TODO Auto-generated method stub
return null;
}
@Override
public void setLoaderProps(Properties loaderProps) {
// TODO Auto-generated method stub
}
private long rebuildCIDSIDViaTempLoad() {
long deltaMapCnt = 0;
long initCnt = 0;
long newMapCnt = 0;
try {
initCnt = BardDBUtil.getTableRowCount("cid_sid", service.getDbURL());
conn = BardDBUtil.connect(service.getDbURL());
BardExternalResource resource = service.getExtResources().get(0);
//unzip the file
String cidsidGZIPPath = service.getLocalResPath()+"/"+resource.getFileName();
String cidsidPath = cidsidGZIPPath.replace(".gz", "");
gunZip(cidsidGZIPPath, cidsidPath);
//make the temp_cid_sid and truncate
Statement stmt = conn.createStatement();
stmt.execute("create table if not exists temp_cid_sid like cid_sid");
stmt.execute("truncate table temp_cid_sid");
//load the temp table
stmt.execute("load data infile '"+cidsidPath+"' into table temp_cid_sid (cid, sid, rel_type)");
newMapCnt = BardDBUtil.getTableRowCount("temp_cid_sid", service.getDbURL());
deltaMapCnt = newMapCnt - initCnt;
//swap tables iff larger or nearly the same size. Allow from some contraction due to deleted substances.
BardDBUtil.swapTempTableToProductionIfPassesSizeDelta("temp_cid_sid", "cid_sid", 0.95, service.getDbURL());
conn.close();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return deltaMapCnt;
}
private long backfillNullCIDsInData() {
long updates = 0;
try {
conn = BardDBUtil.connect(service.getDbURL());
conn.setAutoCommit(false);
Statement stmt = conn.createStatement();
stmt.setFetchSize(Integer.MIN_VALUE);
ResultSet rs = stmt.executeQuery("select expt_data_id, sid from bard_experiment_data " +
"where cid=0");
Hashtable <Long, ArrayList<Long>> sidToDataIdListTable = new Hashtable <Long, ArrayList<Long>>();
ArrayList<Long> idList;
while(rs.next()) {
idList = sidToDataIdListTable.get(rs.getLong(2));
if(idList == null) {
idList = new ArrayList<Long>();
idList.add(rs.getLong(1));
sidToDataIdListTable.put(rs.getLong(2), idList);
} else {
idList.add(rs.getLong(1));
}
}
rs.close();
stmt.close();
//now query to find the sids that have cids.
Set <Long> sids = sidToDataIdListTable.keySet();
ArrayList <Long> smartSIDs = new ArrayList<Long>();
Hashtable <Long, Long> sidCidHash = new Hashtable<Long,Long>();
PreparedStatement ps = conn.prepareStatement("select cid from cid_sid where sid = ? and rel_type=1");
long cid;
for(Long sid : sids) {
ps.setLong(1, sid);
rs = ps.executeQuery();
if(rs.next()) {
cid = rs.getLong(1);
if(cid != 0) {
sidCidHash.put(sid,cid);
}
}
}
ps.close();
//sids to work on, they are missing cids in data and now have cids
sids = sidCidHash.keySet();
PreparedStatement psDataUpdate = conn.prepareStatement("update bard_experiment_data set cid = ? where expt_data_id=?");
PreparedStatement psGetJson = conn.prepareStatement("select json_response from bard_experiment_result where expt_data_id=?");
PreparedStatement psResultUpdate = conn.prepareStatement("update bard_experiment_result set json_response=? where expt_data_id=?");
ObjectMapper mapper = new ObjectMapper();
BardExptDataResponse response;
Blob blob;
boolean ready = false;
byte [] buffer;
for(Long sid : sids) {
idList = sidToDataIdListTable.get(sid);
for(Long dataId : idList) {
ready = false;
psDataUpdate.setLong(2, dataId);
psDataUpdate.setLong(1, sidCidHash.get(sid));
psDataUpdate.executeUpdate();
//now the fun part, updating the json
psGetJson.setLong(1, dataId);
rs = psGetJson.executeQuery();
if(rs.next()) {
blob = rs.getBlob(1);
buffer = blob.getBytes(1, (int) blob.length());
String s = new String(buffer);
try {
response = mapper.readValue(buffer, BardExptDataResponse.class);
if(response.getCid() == null || response.getCid() == 0) {
response.setCid(sidCidHash.get(sid));
psResultUpdate.setLong(2, dataId);
psResultUpdate.setString(1, mapper.writeValueAsString(response));
psResultUpdate.executeUpdate();
ready = true;
}
} catch (JsonParseException e) {
e.printStackTrace();
continue;
} catch (JsonMappingException e) {
e.printStackTrace();
continue;
} catch (IOException e) {
e.printStackTrace();
continue;
}
if(ready) {
conn.commit();
updates++;
} else {
conn.rollback();
}
} else {
ready = false;
log.warning("Failed to backfill expt_data_id="+dataId+" no result found to modify.");
}
}
}
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return updates;
}
}