/**
*
* Copyright (C) 2013 Vanderbilt University <csaba.toth, b.malin @vanderbilt.edu>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.openhie.openempi.blocking.privacypreserving;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.openhie.openempi.configuration.ConfigurationRegistry;
import org.openhie.openempi.configuration.PrivacyPreservingBlockingField;
import org.openhie.openempi.configuration.PrivacyPreservingBlockingSettings;
import org.openhie.openempi.context.Context;
import org.openhie.openempi.matching.fellegisunter.FellegiSunterParameters;
import org.openhie.openempi.model.ColumnInformation;
import org.openhie.openempi.model.LeanRecordPair;
import org.openhie.openempi.model.Person;
import org.openhie.openempi.model.Dataset;
import org.openhie.openempi.recordlinkage.configuration.PrivacySettings;
import org.openhie.openempi.service.PersonQueryService;
public abstract class BlockingWithRandomBitsServiceBase extends PrivacyPreservingBlockingBase
{
public abstract Random getRandomSource();
public Person modelPerson = null;
public void getRecordPairs(Object blockingServiceCustomParameters, String matchingServiceTypeName,
Object matchingServiceCustomParameters, String leftTableName, String rightTableName,
List<LeanRecordPair> pairs, boolean emOnly, FellegiSunterParameters fellegiSunterParameters) {
PrivacyPreservingBlockingSettings ppbs = Context.getConfiguration().getPrivacyPreservingBlockingSettings();
List<PrivacyPreservingBlockingField> ppbFields = ppbs.getPrivacyPreservingBlockingFields();
int numberOfRandomBits = ppbs.getNumberOfBlockingBits();
int numberOfFields = ppbFields.size();
Random r = getRandomSource();
PersonQueryService personQueryService = Context.getPersonQueryService();
Dataset leftDataset = personQueryService.getDatasetByTableName(leftTableName);
List<ColumnInformation> leftColumnInformation = leftDataset.getColumnInformation();
Dataset rightDataset = personQueryService.getDatasetByTableName(rightTableName);
List<ColumnInformation> rightColumnInformation = rightDataset.getColumnInformation();
Set<String> idPairHash = new HashSet<String>();
for(int runIndex = 0; runIndex < ppbs.getNumberOfRuns(); runIndex++) {
List<BloomFilterBitStat> selectedBits = new ArrayList<BloomFilterBitStat>();
for(int i = 0; i < numberOfRandomBits; i++) {
int randomFieldIndex = r.nextInt(numberOfFields);
PrivacySettings privacySettings =
(PrivacySettings)Context.getConfiguration().lookupConfigurationEntry(ConfigurationRegistry.RECORD_LINKAGE_PROTOCOL_SETTINGS);
int bloomFilterSize = privacySettings.getBloomfilterSettings().getDefaultM();
// get the function parameters from the persisted import configuration and check match
PrivacyPreservingBlockingField ppbField = ppbFields.get(randomFieldIndex);
int leftBloomFilterK = 0;
int leftBloomFilterM = 0;
for (ColumnInformation leftCI : leftColumnInformation) {
if (leftCI.getFieldName().equals(ppbField.getLeftFieldName())) {
leftBloomFilterK = leftCI.getBloomFilterKParameter();
leftBloomFilterM = leftCI.getBloomFilterMParameter();
}
}
int rightBloomFilterK = 0;
int rightBloomFilterM = 0;
for (ColumnInformation rightCI : rightColumnInformation) {
if (rightCI.getFieldName().equals(ppbField.getRightFieldName())) {
rightBloomFilterK = rightCI.getBloomFilterKParameter();
rightBloomFilterM = rightCI.getBloomFilterMParameter();
}
}
if (leftBloomFilterK != rightBloomFilterK || leftBloomFilterM != rightBloomFilterM)
log.error("Left and right bloom filter parameters doesn't match: " +
ppbField.getLeftFieldName() + " - " + leftBloomFilterK + ", " + leftBloomFilterM + ", " +
ppbField.getRightFieldName() + " - " + rightBloomFilterK + ", " + rightBloomFilterM, null);
if (rightBloomFilterM != 0)
bloomFilterSize = rightBloomFilterM;
int randomBitIndex = r.nextInt(bloomFilterSize);
BloomFilterBitStat bitStat = new BloomFilterBitStat(randomFieldIndex, randomBitIndex);
selectedBits.add(bitStat);
}
getRecordPairs(selectedBits, pairs, idPairHash, leftTableName, rightTableName, emOnly, fellegiSunterParameters);
}
}
}