/*
* Licensed to the Ted Dunning under one or more contributor license
* agreements. See the NOTICE file that may be
* distributed with this work for additional information
* regarding copyright ownership. Ted Dunning licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.mapr.synth.samplers;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.*;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.mapr.synth.FancyTimeFormatter;
import org.apache.mahout.math.jet.random.Gamma;
import java.text.ParseException;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* The changer sampler emulates data evolution over time. The idea is that you give a schema for the
* base record. Fields in the base record will be changed at random and the value of the record will
* be recorded in a list. In addition to the fields in the record, there will be a list of change flags,
* one per field that are set to 1 when a field changed and left as 0 otherwise. The time of each change
* is also recorded.
* <p>
* The final result is the list of all record states and change flags.
*/
public class Changer extends FieldSampler {
private final Pattern ratePattern = Pattern.compile("([0-9.e\\-]+)(?:/([smhdw]))?");
private final Pattern timePattern = Pattern.compile("([0-9.e\\-]+)([smhdw])?");
public static abstract class MilliConverter {
public abstract double toMillis(double x);
public static double toMillis(String unit, double x) {
if (unit == null) {
unit = "s";
}
MilliConverter converter = Changer.unitMap.get(unit);
if (converter == null) {
converter = unitMap.get("s");
}
return converter.toMillis(x);
}
}
private static final Map<String, ? extends MilliConverter> unitMap = ImmutableMap.of(
"s", new MilliConverter() {
@Override
public double toMillis(double x) {
return TimeUnit.SECONDS.toMillis(1) * x;
}
},
"m", new MilliConverter() {
@Override
public double toMillis(double x) {
return TimeUnit.MINUTES.toMillis(1) * x;
}
},
"h", new MilliConverter() {
@Override
public double toMillis(double x) {
return TimeUnit.HOURS.toMillis(1) * x;
}
},
"d", new MilliConverter() {
@Override
public double toMillis(double x) {
return TimeUnit.DAYS.toMillis(1) * x;
}
},
"w", new MilliConverter() {
@Override
public double toMillis(double x) {
return 7 * TimeUnit.DAYS.toMillis(1) * x;
}
}
);
private List<FieldSampler> fields;
private List<String> fieldNames;
private String prefix = "change-";
private JsonNodeFactory nodeFactory = JsonNodeFactory.withExactBigDecimals(false);
private Random gen = new Random();
private double end = System.currentTimeMillis();
private double start = System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(100, TimeUnit.DAYS);
// these are used to sample which field to change
private Gamma x, y;
private double meanInterval = 1000; // interval - offset will have this mean
private double minInterval = 0; // no interval can be less than this
private FancyTimeFormatter df = new FancyTimeFormatter("yyyy-MM-dd");
public Changer(@JsonProperty("values") List<FieldSampler> fields) {
this.fields = fields;
fieldNames = Lists.newArrayList();
for (FieldSampler field : fields) {
fieldNames.add(field.getName());
}
x = new Gamma(1, 1, gen);
y = new Gamma(3, 1, gen);
}
@SuppressWarnings("unused")
public void setPrefix(String prefix) {
this.prefix = prefix;
}
@SuppressWarnings("UnusedDeclaration")
public void setSkew(double skew) {
if (skew < 0) {
x = new Gamma(skew, 1, gen);
y = new Gamma(1, 1, gen);
} else {
x = new Gamma(1, 1, gen);
y = new Gamma(skew, 1, gen);
}
}
/**
* Determines the rate at which simulated events arrive. This rate can be a number in which case
* it is interpreted as a number of events per second. The rate can also be a string like 5/m
* which means 5 events per minute. The supported units are seconds (s), minutes (m), hours (h),
* and days (d).
*
* @param rate The rate at which events arrive.
*/
@SuppressWarnings("UnusedDeclaration")
public void setRate(String rate) {
Matcher m = ratePattern.matcher(rate);
if (m.matches()) {
// group(1) is the number, group(2) is either empty or a unit abbreviation letter
this.meanInterval = MilliConverter.toMillis(m.group(2), 1) / Double.parseDouble(m.group(1));
} else {
throw new IllegalArgumentException(String.format("Invalid rate argument: %s", rate));
}
}
/**
* Sets a lower bound on the time between events. This bound is enforced by generating events
* with an exponential distribution and then adding this offset. The offset is specified in
* seconds.
*
* @param offset The minimum separation between events
*/
@SuppressWarnings("UnusedDeclaration")
public void setOffset(String offset) {
Matcher m = timePattern.matcher(offset);
if (m.matches()) {
// group(1) is the number, group(2) is either empty (default to s) or d or some such.
this.minInterval = MilliConverter.toMillis(m.group(2), Double.parseDouble(m.group(1)));
} else {
throw new IllegalArgumentException(String.format("Invalid time interval argument: %s", offset));
}
}
/**
* Sets the format to be used in outputing event times. Standard Java date formatting rules apply. The
* default format is yyyy-MM-dd. Another popular option is "yyyy-MM-dd HH:mm:ss.SS X".
*
* As a special treat, "s" can be used for seconds since epoch and "Q" can be used for milliseconds since
* the epoch.
*
* @param format The preferred data format.
*/
@SuppressWarnings("unused")
public void setFormat(String format) {
df = new FancyTimeFormatter(format);
}
/**
* Sets the starting time for events. This will be exactly the time of the first event. Note that
* the format for the starting time will be the default format unless the format argument precedes
* this attribute.
*
* @param start The start time for the sequence
* @throws ParseException
*/
@SuppressWarnings("UnusedDeclaration")
public void setStart(String start) throws ParseException {
this.start = df.parse(start).getTime();
}
/**
* Sets the ending time for events. This will be after the time of any event we generate.
*
* @param end The upper bound for event time
* @throws ParseException
*/
@SuppressWarnings("UnusedDeclaration")
public void setEnd(String end) throws ParseException {
this.end = df.parse(end).getTime();
}
@Override
public JsonNode sample() {
ArrayNode history = new ArrayNode(nodeFactory);
Map<String, JsonNode> current = Maps.newLinkedHashMap();
Map<String, JsonNode> changes = Maps.newLinkedHashMap();
for (int i = 0; i < fieldNames.size(); i++) {
current.put(fieldNames.get(i), fields.get(i).sample());
changes.put(fieldNames.get(i), IntNode.valueOf(0));
}
double t = start - meanInterval * Math.log(1 - gen.nextDouble());
while (t < end) {
Date now = new Date((long) t);
int change = pickField();
JsonNode newValue = fields.get(change).sample();
if (fields.get(change).isFlat()) {
Iterator<String> fx = newValue.fieldNames();
while (fx.hasNext()) {
String fieldName = fx.next();
current.put(fieldName, newValue.get(fieldName));
changes.put(fieldName, new IntNode(1));
}
} else {
current.put(fieldNames.get(change), newValue);
changes.put(fieldNames.get(change), new IntNode(1));
}
history.add(asJson(df.format(now), current, changes));
if (fields.get(change).isFlat()) {
Iterator<String> fx = newValue.fieldNames();
while (fx.hasNext()) {
String fieldName = fx.next();
current.put(prefix + fieldName, new IntNode(0));
}
} else {
changes.put(fieldNames.get(change), IntNode.valueOf(0));
}
t += minInterval - meanInterval * Math.log(1 - gen.nextDouble());
}
return history;
}
private ObjectNode asJson(String now, Map<String, JsonNode> current, Map<String, JsonNode> changes) {
ObjectNode r = new ObjectNode(nodeFactory);
r.put("time", now);
ObjectNode r1 = r.putObject("values");
ObjectNode r2 = r.putObject("changes");
for (String key : current.keySet()) {
r1.set(key, current.get(key));
}
for (String key : changes.keySet()) {
r2.set(key, changes.get(key));
}
return r;
}
private int pickField() {
double xValue = x.nextDouble();
double yValue = y.nextDouble();
double beta = xValue / (xValue + yValue);
return (int) Math.floor(beta * fieldNames.size());
}
}