Skip to content

Add fast floating-point parsing, generation support #314

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,8 @@ public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
_ioContext = ctxt;
_formatFeatures = csvFeatures;
_schema = schema;
_writer = new CsvEncoder(ctxt, csvFeatures, out, schema);
boolean useFastDoubleWriter = StreamWriteFeature.USE_FAST_DOUBLE_WRITER.enabledIn(jsonFeatures);
_writer = new CsvEncoder(ctxt, csvFeatures, out, schema, useFastDoubleWriter);
_writeContext = null; // just to make sure it won't be used
_tokenWriteContext = SimpleTokenWriteContext.createRootContext(null);
_writer.setOutputEscapes(CsvCharacterEscapes.fromCsvFeatures(csvFeatures).getEscapeCodesForAscii());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@ private final void _parseSlowFloatValue(boolean exactNumber)
_numTypesValid = NR_BIGDECIMAL;
} else {
// Otherwise double has to do
_numberDouble = _textBuffer.contentsAsDouble();
_numberDouble = _textBuffer.contentsAsDouble(_owner.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
_numTypesValid = NR_DOUBLE;
}
} catch (NumberFormatException nex) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.fasterxml.jackson.dataformat.csv.impl;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.io.CharTypes;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.dataformat.csv.CsvGenerator;
Expand Down Expand Up @@ -116,6 +117,11 @@ public class CsvEncoder
*/
protected boolean _cfgEscapeControlCharWithEscapeChar;

/**
* @since 2.14
*/
protected boolean _cfgUseFastDoubleWriter;

protected final char _cfgQuoteCharEscapeChar;

/**
Expand Down Expand Up @@ -193,10 +199,18 @@ public class CsvEncoder
/**********************************************************
*/


@Deprecated //since 2.14
public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema)
{
this(ctxt, csvFeatures, out, schema, false);
}

public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema, boolean useFastDoubleWriter)
{
_ioContext = ctxt;
_csvFeatures = csvFeatures;
_cfgUseFastDoubleWriter = useFastDoubleWriter;
_cfgOptimalQuoting = CsvGenerator.Feature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
_cfgIncludeMissingTail = !CsvGenerator.Feature.OMIT_MISSING_TAIL_COLUMNS.enabledIn(_csvFeatures);
_cfgAlwaysQuoteStrings = CsvGenerator.Feature.ALWAYS_QUOTE_STRINGS.enabledIn(csvFeatures);
Expand Down Expand Up @@ -235,6 +249,7 @@ public CsvEncoder(CsvEncoder base, CsvSchema newSchema)
{
_ioContext = base._ioContext;
_csvFeatures = base._csvFeatures;
_cfgUseFastDoubleWriter = base._cfgUseFastDoubleWriter;
_cfgOptimalQuoting = base._cfgOptimalQuoting;
_cfgIncludeMissingTail = base._cfgIncludeMissingTail;
_cfgAlwaysQuoteStrings = base._cfgAlwaysQuoteStrings;
Expand Down Expand Up @@ -586,7 +601,7 @@ protected void appendValue(long value) throws IOException

protected void appendValue(float value) throws IOException
{
String str = NumberOutput.toString(value);
String str = NumberOutput.toString(value, _cfgUseFastDoubleWriter);
final int len = str.length();
if ((_outputTail + len) >= _outputEnd) { // >= to include possible comma too
_flushBuffer();
Expand All @@ -599,7 +614,7 @@ protected void appendValue(float value) throws IOException

protected void appendValue(double value) throws IOException
{
String str = NumberOutput.toString(value);
String str = NumberOutput.toString(value, _cfgUseFastDoubleWriter);
final int len = str.length();
if ((_outputTail + len) >= _outputEnd) { // >= to include possible comma too
_flushBuffer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,25 @@ public final static boolean inLongRange(char[] digitChars, int offset, int len,
return true;
}

public final static double parseDouble(String numStr) throws NumberFormatException
{
return Double.parseDouble(numStr);
/**
* @param s a string representing a number to parse
* @return closest matching double
* @throws NumberFormatException if string cannot be represented by a double where useFastParser=false
* @deprecated use {@link #parseDouble(String, boolean)}
*/
@Deprecated //since 2.14
public static double parseDouble(final String s) throws NumberFormatException {
return parseDouble(s, false);
}

/**
* @param s a string representing a number to parse
* @param useFastParser whether to use {@link com.fasterxml.jackson.core.io.doubleparser}
* @return closest matching double
* @throws NumberFormatException if string cannot be represented by a double
* @since v2.14
*/
public static double parseDouble(final String s, final boolean useFastParser) throws NumberFormatException {
return com.fasterxml.jackson.core.io.NumberInput.parseDouble(s, useFastParser);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,40 @@ public static String toString(long value)
}
*/

public static String toString(double value)
{
return Double.toString(value);
/**
* @param v double
* @return double as a string
*/
public static String toString(final double v) {
return toString(v, false);
}

public static String toString(float value)
{
return Float.toString(value);
/**
* @param v double
* @param useFastWriter whether to use Schubfach algorithm to write output (default false)
* @return double as a string
* @since 2.14
*/
public static String toString(final double v, final boolean useFastWriter) {
return com.fasterxml.jackson.core.io.NumberOutput.toString(v, useFastWriter);
}

/**
* @param v float
* @return float as a string
*/
public static String toString(final float v) {
return toString(v, false);
}

/**
* @param v float
* @param useFastWriter whether to use Schubfach algorithm to write output (default false)
* @return float as a string
* @since 2.14
*/
public static String toString(final float v, final boolean useFastWriter) {
return com.fasterxml.jackson.core.io.NumberOutput.toString(v, useFastWriter);
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,13 +312,27 @@ public BigDecimal contentsAsDecimal()
/**
* Convenience method for converting contents of the buffer
* into a Double value.
* @deprecated use {@link #contentsAsDouble(boolean)}
*/
public double contentsAsDouble()
throws NumberFormatException
{
@Deprecated //since 2.14
public double contentsAsDouble() throws NumberFormatException {
return NumberInput.parseDouble(contentsAsString());
}

/**
* Convenience method for converting contents of the buffer
* into a Double value.
*
* @param useFastParser whether to use {@link com.fasterxml.jackson.core.io.doubleparser}
* @return Buffered text value parsed as a {@link Double}, if possible
*
* @throws NumberFormatException if contents are not a valid Java number
* @since 2.14
*/
public double contentsAsDouble(final boolean useFastParser) throws NumberFormatException {
return NumberInput.parseDouble(contentsAsString(), useFastParser);
}

public boolean looksLikeInt() {
final char[] ch = contentsAsArray();
final int len = ch.length;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package com.fasterxml.jackson.dataformat.csv;

import com.fasterxml.jackson.core.StreamReadFeature;
import com.fasterxml.jackson.core.StreamWriteFeature;

import java.io.StringReader;
import java.io.StringWriter;

public class FeaturesTest extends ModuleTestBase
Expand All @@ -26,4 +30,31 @@ public void testFactoryFeatures() throws Exception
assertTrue(g.canUseSchema(CsvSchema.emptySchema()));
g.close();
}

public void testFactoryFastFeatures() throws Exception
{
CsvFactory f = new CsvFactory();
f.enable(StreamReadFeature.USE_FAST_DOUBLE_PARSER.mappedFeature());
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER.mappedFeature()));
f.enable(StreamWriteFeature.USE_FAST_DOUBLE_WRITER.mappedFeature());
assertTrue(f.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER.mappedFeature()));
CsvParser parser = f.createParser(new StringReader(""));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
CsvGenerator generator = f.createGenerator(new StringWriter());
assertTrue(generator.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
}

public void testFactoryBuilderFastFeatures() throws Exception
{
CsvFactory f = CsvFactory.builder()
.enable(StreamReadFeature.USE_FAST_DOUBLE_PARSER)
.enable(StreamWriteFeature.USE_FAST_DOUBLE_WRITER)
.build();
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER.mappedFeature()));
assertTrue(f.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER.mappedFeature()));
CsvParser parser = f.createParser(new StringReader(""));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
CsvGenerator generator = f.createGenerator(new StringWriter());
assertTrue(generator.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ private void _testSimpleExplicit(ObjectReader r, boolean useBytes) throws Except
if (useBytes) {
user = r.readValue(INPUT);
} else {
user = r.readValue(INPUT.getBytes("UTF-8"));
user = r.readValue(utf8(INPUT));
}
assertEquals("Bob", user.firstName);
assertEquals("Robertson", user.lastName);
Expand All @@ -72,7 +72,7 @@ public void testSimpleExplicitWithBOM() throws Exception {

// first, UTF-8 BOM:
b.write(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
b.write("Bob,Robertson,MALE,AQIDBAU=,false\n".getBytes("UTF-8"));
b.write(utf8("Bob,Robertson,MALE,AQIDBAU=,false\n"));
b.close();

user = r.readValue(b.toByteArray());
Expand Down Expand Up @@ -139,7 +139,7 @@ private void _testMapsWithLinefeeds(boolean useBytes) throws Exception {
MappingIterator<Map<String, String>> mi;

if (useBytes) {
mi = or.readValues(CSV.getBytes("UTF-8"));
mi = or.readValues(utf8(CSV));
} else {
mi = or.readValues(CSV);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.fasterxml.jackson.dataformat.csv.deser;

import com.fasterxml.jackson.core.StreamReadFeature;
import com.fasterxml.jackson.dataformat.csv.CsvFactory;

public class FastParserStreamingCSVReadTest extends StreamingCSVReadTest {
private final CsvFactory CSV_F = CsvFactory.builder()
.enable(StreamReadFeature.USE_FAST_DOUBLE_PARSER)
.build();

@Override
protected CsvFactory csvFactory() {
return CSV_F;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public void testSimpleObjectWithHeader() throws IOException
{
CsvFactory f = new CsvFactory();
DataFormatDetector detector = new DataFormatDetector(f);
byte[] doc = "name,place,town\nBob,home,Denver\n".getBytes("UTF-8");
byte[] doc = utf8("name,place,town\nBob,home,Denver\n");
DataFormatMatcher matcher = detector.findFormat(doc);
// should have match
assertTrue(matcher.hasMatch());
Expand All @@ -23,7 +23,7 @@ public void testSimpleObjectWithHeader() throws IOException
assertSame(f, matcher.getMatch());

// and also something that does NOT look like CSV
doc = "{\"a\":3}".getBytes("UTF-8");
doc = utf8("{\"a\":3}");
matcher = detector.findFormat(doc);
assertFalse(matcher.hasMatch());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;

import org.junit.Assert;

Expand Down Expand Up @@ -80,7 +81,7 @@ public static class CloseTrackerOutputStream extends ByteArrayInputStream {
private boolean closed;

public CloseTrackerOutputStream(String s) {
super(s.getBytes());
super(s.getBytes(StandardCharsets.UTF_8));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ public class StreamingCSVReadTest extends ModuleTestBase
.setUseHeader(false)
.build();

protected CsvFactory csvFactory() {
return CSV_F;
}

public void testIntRead() throws Exception
{
_testInts(1, 59, -8);
Expand Down Expand Up @@ -194,9 +198,9 @@ private CsvParser _parser(String csv, boolean useBytes, CsvSchema schema)
{
CsvParser p;
if (useBytes) {
p = CSV_F.createParser(new ByteArrayInputStream(csv.getBytes("UTF-8")));
p = csvFactory().createParser(new ByteArrayInputStream(utf8(csv)));
} else {
p = CSV_F.createParser(csv);
p = csvFactory().createParser(csv);
}
p.setSchema(schema);
return p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import com.fasterxml.jackson.core.JsonGenerator;

import com.fasterxml.jackson.core.StreamWriteFeature;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.fasterxml.jackson.databind.node.ObjectNode;
Expand Down Expand Up @@ -113,12 +114,25 @@ public void testExplicitWithFloat() throws Exception
.build();

float amount = 1.89f;
//this value loses precision when converted
assertFalse(Double.toString((double)amount).equals("1.89"));
assertFalse(Double.toString(amount).equals("1.89"));
String result = MAPPER.writer(schema).writeValueAsString(new Entry2("abc", amount));
assertEquals("abc,1.89\n", result);
}

public void testExplicitWithFastFloat() throws Exception
{
CsvSchema schema = CsvSchema.builder()
.addColumn("id")
.addColumn("amount")
.build();

float amount = 1.89f;
assertFalse(Double.toString(amount).equals("1.89"));
CsvMapper mapper = CsvMapper.builder().enable(StreamWriteFeature.USE_FAST_DOUBLE_WRITER).build();
String result = mapper.writer(schema).writeValueAsString(new Entry2("abc", amount));
assertEquals("abc,1.89\n", result);
}

public void testExplicitWithQuoted() throws Exception
{
CsvSchema schema = CsvSchema.builder()
Expand Down