package org.regenstrief.linkage.util;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.regenstrief.linkage.MatchResult;
import org.regenstrief.linkage.MatchVector;
import org.regenstrief.linkage.Record;
import org.regenstrief.linkage.analysis.RecordFrequencies;
import org.regenstrief.linkage.analysis.VectorTable;
public class MUSyntheticRecordGenerator extends SyntheticRecordGenerator {
double p;
Random rand;
long count;
VectorTable vt;
private List<String> primary_demographics;
private List<String> dependent_demographics;
private ScorePair sp;
public MUSyntheticRecordGenerator(MatchingConfig mc, RecordFrequencies rf, double p){
super(mc, rf);
this.p = p;
count = 0;
rand = new Random();
vt = new VectorTable(mc);
sp = new ScorePair(mc);
primary_demographics = new ArrayList<String>();
dependent_demographics = new ArrayList<String>();
generateDemographicsLists();
}
@Override
public MatchResult getRecordPair() {
Record[] ret = new Record[2];
ret[0] = new Record(count++, "synthetic");
ret[1] = new Record(count++, "synthetic");
boolean true_match = false;
double d = rand.nextDouble();
if(d <= p){
true_match = true;
}
MatchVector mv = generateMatchVector(true_match);
Iterator<String> p_it = primary_demographics.iterator();
while(p_it.hasNext()){
String p_demographic = p_it.next();
MatchingConfigRow mcr = mc.getMatchingConfigRowByName(p_demographic);
if(true_match){
setRandomDemographic(mv, ret[0], p_demographic);
double agreement = rand.nextDouble();
if(agreement < mcr.getAgreement()){
ret[1].addDemographic(p_demographic, ret[0].getDemographic(p_demographic));
String rank_demographic = p_demographic + DEMOGRAPHIC_RANK_SUFFIX;
ret[1].addDemographic(rank_demographic, ret[0].getDemographic(rank_demographic));
} else {
setRandomDisagreementDemographic(mv, ret[1], p_demographic, ret[0].getDemographic(p_demographic));
}
} else {
// set random demographics for both records
setRandomDemographic(mv, ret[0], p_demographic);
setRandomDemographic(mv, ret[1], p_demographic);
}
}
Iterator<String> d_it = dependent_demographics.iterator();
while(d_it.hasNext()){
String demographic = d_it.next();
MatchingConfigRow mcr = mc.getMatchingConfigRowByName(demographic);
if(true_match){
setRandomDemographic(mv, ret[0], demographic);
double agreement = rand.nextDouble();
if(agreement < mcr.getAgreement()){
ret[1].addDemographic(demographic, ret[0].getDemographic(demographic));
String rank_demographic = demographic + DEMOGRAPHIC_RANK_SUFFIX;
ret[1].addDemographic(rank_demographic, ret[0].getDemographic(rank_demographic));
} else {
setRandomDisagreementDemographic(mv, ret[1], demographic, ret[0].getDemographic(demographic));
}
} else {
// set random demographics for both records
setRandomDemographic(mv, ret[0], demographic);
setRandomDemographic(mv, ret[1], demographic);
}
}
// create match vector that describes the generated records
MatchResult mr = sp.scorePair(ret[0], ret[1]);
// double score, double incl_score, double true_prob, double false_prob, double sensitivity, double specificity, MatchVector match_vct, ScoreVector score_vct, Record r1, Record r2, MatchingConfig mc){
//MatchResult mr = new MatchResult(vt.getScore(mv), vt.getInclusiveScore(mv), vt.getMatchVectorTrueProbability(mv), vt.getMatchVectorFalseProbability(mv), vt.getSensitivity(mv), vt.getSpecificity(mv), mv, vt.getScoreVector(mv), ret[0], ret[1], mc);
mr.setMatch(true_match);
return mr;
}
private MatchVector generateMatchVector(boolean true_match){
MatchVector ret = new MatchVector();
Iterator<MatchingConfigRow> it = mc.getMatchingConfigRows().iterator();
while(it.hasNext()){
MatchingConfigRow mcr = it.next();
String demographic = mcr.getName();
}
return ret;
}
private void generateDemographicsLists(){
//Iterator<MatchingConfigRow> it = mc.getIncludedColumns().iterator();
Iterator<MatchingConfigRow> it = mc.getMatchingConfigRows().iterator();
while(it.hasNext()){
String demographic = it.next().getName();
String context = rf1.getContext(demographic);
if(context != null && !primary_demographics.contains(context)){
primary_demographics.add(context);
dependent_demographics.add(demographic);
if(dependent_demographics.contains(context)){
dependent_demographics.remove(context);
}
}
if(!dependent_demographics.contains(demographic) && !primary_demographics.contains(demographic)){
dependent_demographics.add(demographic);
}
}
}
}