/*
* Copyright (c) 2011 LinkedIn, Inc
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.flaptor.indextank.suggest;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.log4j.Logger;
import com.flaptor.util.Execute;
import com.google.common.base.Functions;
import com.google.common.base.Preconditions;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
@Deprecated
class PopularityIndex {
private static final Logger logger = Logger.getLogger(Execute.whoAmI());
static final String MAIN_FILE_NAME = "popularityIndex";
private static final int TREE_SIZE = 36;
private static final int SUGGESTION_LIMIT = 3;
private final File backupDir;
private Leaf root;
public PopularityIndex(File backupDir, boolean load) throws IOException {
Preconditions.checkNotNull(backupDir);
checkDirArgument(backupDir);
this.backupDir = backupDir;
if (!load) {
logger.info("Starting a new(empty) PopularityIndex.");
root = new Leaf();
} else {
File f = new File(this.backupDir, MAIN_FILE_NAME);
ObjectInputStream is = null;
try {
is = new ObjectInputStream(new BufferedInputStream(new FileInputStream(f)));
try {
root = (Leaf) is.readObject();
} catch (ClassNotFoundException e) {
throw new IllegalStateException(e);
}
logger.info("State loaded.");
} finally {
Execute.close(is);
}
}
}
public void dump() throws IOException {
syncToDisk();
}
/**
* Serializes this instance content to disk.
* Blocking method.
*/
private synchronized void syncToDisk() throws IOException {
logger.info("Starting dump to disk.");
File f = new File(backupDir, MAIN_FILE_NAME);
ObjectOutputStream os = null;
try {
os = new ObjectOutputStream(new BufferedOutputStream(new FileOutputStream(f)));
os.writeObject(root);
os.flush();
logger.info("Dump to disk completed.");
} finally {
Execute.close(os);
}
}
/**
* @throws IllegalArgumentException
*/
private static void checkDirArgument(File backupDir) {
Preconditions.checkNotNull(backupDir);
if (!backupDir.canRead()) {
String s = "Don't have read permission over the backup directory(" + backupDir.getAbsolutePath() + ").";
logger.error(s);
throw new IllegalArgumentException(s);
}
if (!backupDir.canWrite()) {
String s = "Don't have write permission over the backup directory(" + backupDir.getAbsolutePath() + ").";
logger.error(s);
throw new IllegalArgumentException(s);
}
}
private String normalize(String str) {
return str.toLowerCase();
}
public List<String> getMostPopular(String str) {
str = normalize(str);
Leaf leaf;
try {
leaf = search(str, root);
} catch (UnsupportedCharacterException e) {
if (logger.isDebugEnabled()) {
logger.debug("String with unsupported character, returning no suggestions. Problematic string is \"" + str + "\", character is \"" + e.getCharacter() + "\"");
}
return new ArrayList<String>(0);
}
if (leaf == null) {
return ImmutableList.of();
}
List<Leaf> sorted;
if (leaf.suggestions == null) {
sorted = ImmutableList.of();
} else {
sorted = Ordering.natural().sortedCopy(leaf.suggestions);
}
return Lists.transform(sorted, Functions.toStringFunction());
}
public void add(String str) {
str = normalize(str);
try {
add(0, root, str);
} catch (UnsupportedCharacterException e) {
if (logger.isDebugEnabled()) {
logger.debug("Trying to store string with unsupported character, skipping it. String is \"" + str + "\", character is \"" + e.getCharacter() + "\"");
}
}
}
private Leaf search(CharSequence chars, Leaf leaf) throws UnsupportedCharacterException {
if (chars.length() == 0) {
return leaf;
}
int point = mf(chars.charAt(0));
Leaf nextLeaf = leaf.map[point];
if (null == nextLeaf) {
return null;
}
return search(chars.subSequence(1, chars.length()), nextLeaf);
}
/**
* @returns the number of times this string is stored.
*/
private Leaf add(int position, Leaf leaf, String originalStr) throws UnsupportedCharacterException {
if (position == originalStr.length()) {
// consumed the entire sequence, increment count
leaf.counter.incrementAndGet();
if (leaf.str == null) {
leaf.str = originalStr;
}
return leaf;
}
int point = mf(originalStr.charAt(position));
if (leaf.map[point] == null) {
// if we need to create the child leaf, we lock the parent
synchronized (leaf) {
// if its still null, we create it, now locked
if (leaf.map[point] == null) {
leaf.map[point] = new Leaf();
}
}
}
// increase the prefix counter
//leaf.prefixCounter.incrementAndGet();
Leaf s = add(position+1, leaf.map[point], originalStr);
if (s != null) {
if (!leaf.offerSuggestion(s)) {
s = null;
}
}
return s;
}
private static class SuggestionEntry implements Iterable<Leaf>, Serializable {
private static final long serialVersionUID = 1L;
SuggestionEntry(Leaf suggestion) {
this.suggestion = suggestion;
}
SuggestionEntry next = null;
SuggestionEntry prev = null;
Leaf suggestion;
@Override
public Iterator<Leaf> iterator() {
return new AbstractIterator<Leaf>() {
SuggestionEntry current = SuggestionEntry.this;
@Override
protected Leaf computeNext() {
if (current == null) {
return endOfData();
}
try {
return current.suggestion;
} finally {
current = current.next;
}
}
};
}
}
private static class Leaf implements Comparable<Leaf>, Serializable {
private static final long serialVersionUID = 1L;
AtomicInteger counter = new AtomicInteger(0);
Leaf[] map = new Leaf[TREE_SIZE];
SuggestionEntry suggestions = null;
String str;
public boolean offerSuggestion(Leaf s) {
SuggestionEntry ss = suggestions;
int countSuggestions = 0;
boolean shouldAdd = false;
while (ss != null) {
if (ss.suggestion == s) return true;
if (ss.suggestion.counter.get() < s.counter.get()) {
shouldAdd = true;
}
countSuggestions++;
ss = ss.next;
}
if (shouldAdd || countSuggestions < SUGGESTION_LIMIT) {
synchronized (this) {
// add new suggestion on top
SuggestionEntry e = new SuggestionEntry(s);
e.next = this.suggestions;
if (e.next != null) e.next.prev = e;
this.suggestions = e;
// remove worst suggestion
SuggestionEntry minE = e;
countSuggestions = 0;
while (e != null) {
countSuggestions++;
if (e.suggestion.counter.get() < minE.suggestion.counter.get()) {
minE = e;
}
e = e.next;
}
if (countSuggestions > SUGGESTION_LIMIT) {
if (minE == this.suggestions) {
// removing the head of the list
if (minE.next != null) minE.next.prev = null;
this.suggestions = minE.next;
} else {
if (minE.prev != null) minE.prev.next = minE.next;
if (minE.next != null) minE.next.prev = minE.prev;
}
}
}
return true;
}
return false;
}
@Override
public int compareTo(Leaf o) {
return o.counter.get() - counter.get();
}
@Override
public String toString() {
return str;
}
}
/**
* Mapping function.
*/
private static int mf(char c) throws UnsupportedCharacterException {
int i = (int) c;
if (i >= 48 && i <= 57) {
return (i - 48);
}
if (i >= 97 && i <= 122) {
return (i - 87);
}
throw new UnsupportedCharacterException("Unsupported character: \"" + c + "\"", c);
}
private static class UnsupportedCharacterException extends Exception {
private static final long serialVersionUID = 1L;
private final char character;
public UnsupportedCharacterException(String msg, char character) {
super(msg);
this.character = character;
}
public char getCharacter() {
return character;
}
}
private static void writeNewFormatLeaf(Leaf l, DataOutputStream dos) throws IOException {
if (l.str != null) {
dos.writeUTF(l.str);
dos.writeInt(l.counter.get());
}
for (int i = 0; i < l.map.length; i++) {
Leaf ch = l.map[i];
if (ch != null) {
writeNewFormatLeaf(ch, dos);
}
}
}
public void writeNewFormat(File file) throws IOException {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
try {
writeNewFormatLeaf(this.root, dos);
} finally {
Execute.close(dos);
}
}
}