/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.query.op;
import java.util.Map;
import java.util.Map.Entry;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.util.BundleColumnBinder;
import com.addthis.bundle.util.ValueUtil;
import com.addthis.bundle.value.ValueFactory;
import com.addthis.hydra.data.query.AbstractRowOp;
import com.addthis.hydra.data.util.KeyHistogram;
import io.netty.channel.ChannelProgressivePromise;
/**
* <p>This query operation <span class="hydra-summary">builds a histogram for a column</span>.
* <p>The syntax of the operation is "histo=[column],[scale]. The scale represents the
* base value in the exponential scale of buckets. For example a scale value of 10 would
* yield buckets: 1-9, 10-99, 100-999, etc. The result of this operation is a table with two columns.
* Column 0 stores the lower bound of the bucket and column 1 stores the number of elements
* within the bucket.
*
* @user-reference
* @hydra-name histo
*/
public class OpHistogram extends AbstractRowOp {
private final int scale;
private final int column;
/**
* usage: column, scale
* <p/>
* column defines the column source for the bucket value.
* scale determines the power value for bucket sizing.
*
* @param args
*/
public OpHistogram(String args, ChannelProgressivePromise queryPromise) {
super(queryPromise);
int[] v = csvToInts(args);
if (v.length < 1) {
throw new RuntimeException("missing required column");
}
column = v[0];
scale = v.length > 1 ? v[1] : 10;
histo = new KeyHistogram().setScale(scale).init();
}
KeyHistogram histo;
BundleColumnBinder binder;
Bundle rowFactory;
@Override
public Bundle rowOp(Bundle row) {
if (binder == null) {
binder = getSourceColumnBinder(row);
rowFactory = row.createBundle();
}
histo.update(0, ValueUtil.asNumberOrParse(binder.getColumn(row, column)).asLong().getLong());
return null;
}
@Override
public void sendComplete() {
Map<Long, Long> map = histo.getSortedHistogram();
for (Entry<Long, Long> e : map.entrySet()) {
if (opPromise.isDone()) {
break;
} else {
Bundle row = rowFactory.createBundle();
binder.appendColumn(row, ValueFactory.create(e.getKey()));
binder.appendColumn(row, ValueFactory.create(e.getValue()));
getNext().send(row);
}
}
super.sendComplete();
}
}