/**
*
* Copyright (C) 2013 Vanderbilt University <csaba.toth, b.malin @vanderbilt.edu>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.openhie.openempi.transformation.function.bloomfilter;
public class NgramSequencer implements NgramIterator
{
public static final char ngramPrefix = '%'; // prefix, preferably shouldn't occur in the source string
public static final char ngramPostfix = '$'; // prefix, preferably shouldn't occur in the source string
private StringBuilder source = null; // for speeding up string manipulations
private int ngramSize = 0; // 2 = bigram, 3 = bigram, etc
private int index = 0; // which n-gram we are at
private boolean padding = false;
public void init(String source, int ngramSize, boolean padding) throws IndexOutOfBoundsException
{
if (ngramSize < 1)
throw new IndexOutOfBoundsException("n-gram size must be bigger than 0!");
this.source = new StringBuilder();
for(int i = 0; i < ngramSize - 1; i++)
this.source.append(ngramPrefix);
this.source.append(source);
for(int i = 0; i < ngramSize - 1; i++)
this.source.append(ngramPostfix);
this.ngramSize = ngramSize;
this.padding = padding;
index = 0;
}
public boolean hasNext() {
return (index <= source.length() - ngramSize);
}
public String next() {
int currentindex = index++;
if (!padding && (currentindex < ngramSize || currentindex > source.length() - 2 * ngramSize + 1)) {
if (currentindex < ngramSize) {
return source.substring(ngramSize - 1, ngramSize + currentindex);
} else if (currentindex > source.length() - 2 * ngramSize + 1) {
return source.substring(currentindex, source.length() - ngramSize + 1);
}
}
return source.substring(currentindex, currentindex + ngramSize);
}
public void remove() {
source = null;
}
}