package org.simpleflatmapper.csv;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import org.simpleflatmapper.csv.parser.CellConsumer;
import org.simpleflatmapper.csv.parser.TextFormat;
public class UnescapeBenchmark {
/*
Benchmark (type) (value) Mode Cnt Score Error Units
UnescapeBenchmark.unescape 1 "unescape no escaped quote" avgt 50 17.402 ± 0.238 ns/op
UnescapeBenchmark.unescape 1 "unescape one ""escaped quote""" avgt 50 25.521 ± 0.595 ns/op
UnescapeBenchmark.unescape 1 """a""a""a""a""a""a""a""a""a""" avgt 50 24.265 ± 0.435 ns/op
UnescapeBenchmark.unescape 2 "unescape no escaped quote" avgt 50 19.401 ± 0.503 ns/op
UnescapeBenchmark.unescape 2 "unescape one ""escaped quote""" avgt 50 27.177 ± 0.602 ns/op
UnescapeBenchmark.unescape 2 """a""a""a""a""a""a""a""a""a""" avgt 50 27.313 ± 0.445 ns/op
B
*/
interface Unescaper {
void newCell(char[] chars, int start, int end, CellConsumer cellConsumer);
}
@Benchmark
public void unescape(UnescapeParam param, Blackhole blackhole) {
param.unescaper.newCell(param.content, 0, param.content.length, param.cellConsumer);
}
@State(Scope.Benchmark)
public static class UnescapeParam {
public static final TextFormat TEXT_FORMAT = new TextFormat(',', '"', false);
public char[] content;
public Unescaper unescaper;
@Param(value = { "1", "2"})
public int type;
@Param(value = { "\"unescape no escaped quote\"", "\"unescape one \"\"escaped quote\"\"\"", "\"\"\"a\"\"a\"\"a\"\"a\"\"a\"\"a\"\"a\"\"a\"\"a\"\"\"" })
public String value;
public BlackholeCellConsumer cellConsumer;
@Setup
public void setUp(Blackhole blackhole) {
switch (type) {
case 1:
unescaper = new UnescapeCellPreProcessor1(TEXT_FORMAT);
break;
case 2:
unescaper = new UnescapeCellPreProcessor2(TEXT_FORMAT);
break;
}
content = value.toCharArray();
cellConsumer = new BlackholeCellConsumer(blackhole);
}
}
static class BlackholeCellConsumer implements CellConsumer {
public final Blackhole blackhole;
BlackholeCellConsumer(Blackhole blackhole) {
this.blackhole = blackhole;
}
@Override
public void newCell(char[] chars, int offset, int length) {
blackhole.consume(chars);
blackhole.consume(offset);
blackhole.consume(length);
}
}
public static class UnescapeCellPreProcessor1 implements Unescaper {
private final TextFormat textFormat;
public UnescapeCellPreProcessor1(TextFormat textFormat) {
this.textFormat = textFormat;
}
public final void newCell(char[] chars, int start, int end, CellConsumer cellConsumer) {
int strStart = start;
int strEnd = end;
int escapeChar = textFormat.escapeChar;
if (strStart < strEnd && chars[strStart] == escapeChar) {
strStart ++;
strEnd = unescape(chars, strStart, strEnd, escapeChar);
}
cellConsumer.newCell(chars, strStart, strEnd - strStart);
}
private int unescape(final char[] chars, final int start, final int end, final int escapeChar) {
for(int i = start; i < end - 1; i ++) {
if (chars[i] == escapeChar) {
return removeEscapeChars(chars, end, i, escapeChar);
}
}
if (start < end && chars[end - 1] == escapeChar) {
return end - 1;
}
return end;
}
private int removeEscapeChars(final char[] chars, final int end, final int firstEscapeChar, final int escapeChar) {
int destIndex = firstEscapeChar;
boolean escaped = true;
for(int sourceIndex = firstEscapeChar + 1;sourceIndex < end; sourceIndex++) {
char c = chars[sourceIndex];
if (c != escapeChar || escaped) {
chars[destIndex++] = c;
escaped = false;
} else {
escaped = true;
}
}
return destIndex;
}
}
public static class UnescapeCellPreProcessor2 implements Unescaper {
private final TextFormat textFormat;
public UnescapeCellPreProcessor2(TextFormat textFormat) {
this.textFormat = textFormat;
}
public final void newCell(char[] chars, int start, int end, CellConsumer cellConsumer) {
int strStart = start;
int strEnd = end;
int escapeChar = textFormat.escapeChar;
if (strStart < strEnd && chars[strStart] == escapeChar) {
strStart ++;
strEnd = unescape(chars, strStart, strEnd, escapeChar);
}
cellConsumer.newCell(chars, strStart, strEnd - strStart);
}
private int unescape(final char[] chars, final int start, final int end, final int escapeChar) {
int indexOfEscapeChars = findChar(chars, start, end, escapeChar);
if (indexOfEscapeChars >= end - 1) {
return indexOfEscapeChars;
} else {
return removeEscapeChars(chars, end, indexOfEscapeChars, escapeChar);
}
}
private int findChar(char[] chars, int start, int end, int escapeChar) {
for(int i = start; i < end; i++) {
if (chars[i] == escapeChar) return i;
}
return end;
}
private int removeEscapeChars(final char[] chars, final int end, final int firstEscapeChar, final int escapeChar) {
int destIndex = firstEscapeChar;
boolean escaped = true;
for(int sourceIndex = firstEscapeChar + 1;sourceIndex < end; sourceIndex++) {
char c = chars[sourceIndex];
if (c != escapeChar || escaped) {
chars[destIndex++] = c;
escaped = false;
} else {
escaped = true;
}
}
return destIndex;
}
}
}