Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bolt/core/QueryConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,10 @@ class QueryConfig {

static constexpr const char* kEnableSonicJsonToMap = "sonic.json_to_map";

/// Whether json_to_map escapes raw control chars and retries parse.
static constexpr const char* kJsonToMapEscapeControlChars =
"json_to_map_escape_control_chars";

static constexpr const char* kEnableSonicIsJsonScalar =
"sonic.is_json_scalar";

Expand Down Expand Up @@ -1683,6 +1687,10 @@ class QueryConfig {
return get<bool>(kEnableSonicJsonToMap, true);
}

bool jsonToMapEscapeControlChars() const {
return get<bool>(kJsonToMapEscapeControlChars, true);
}

bool enableSonicIsJsonScalar() const {
return get<bool>(kEnableSonicIsJsonScalar, true);
}
Expand Down
20 changes: 12 additions & 8 deletions bolt/functions/sparksql/JsonToMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,22 +68,25 @@ class JsonToMapFunction : public exec::VectorFunction {
const TypePtr& outputType,
exec::EvalCtx& context,
VectorPtr& result) const override {
folly::call_once(initUseSonic_, [&] {
useSonic_ =
context.execCtx()->queryCtx()->queryConfig().enableSonicJsonParse();
});
const auto& queryConfig = context.execCtx()->queryCtx()->queryConfig();
folly::call_once(
initUseSonic_, [&] { useSonic_ = queryConfig.enableSonicJsonParse(); });

const auto escapeControlChars = queryConfig.jsonToMapEscapeControlChars();

if (useSonic_) {
applySonic(rows, args, outputType, context, result);
applySonic(rows, args, outputType, escapeControlChars, context, result);
} else {
applySimdJson(rows, args, outputType, context, result);
applySimdJson(
rows, args, outputType, escapeControlChars, context, result);
}
}

void applySimdJson(
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
const TypePtr& outputType,
bool escapeControlChars,
exec::EvalCtx& context,
VectorPtr& result) const {
BaseVector::ensureWritable(
Expand Down Expand Up @@ -141,7 +144,7 @@ class JsonToMapFunction : public exec::VectorFunction {

padded_data = current;
bool ok = parseInto(padded_data);
if (!ok) {
if (!ok && escapeControlChars) {
// On failure, escape raw control chars and retry (only the rare
// failure path; valid JSON is unaffected).
padded_data = escapeUnescapedControlChars(current);
Expand All @@ -166,6 +169,7 @@ class JsonToMapFunction : public exec::VectorFunction {
const SelectivityVector& rows,
std::vector<VectorPtr>& args,
const TypePtr& outputType,
bool escapeControlChars,
exec::EvalCtx& context,
VectorPtr& result) const {
BaseVector::ensureWritable(
Expand All @@ -191,7 +195,7 @@ class JsonToMapFunction : public exec::VectorFunction {
kParseIntegerAsRaw | kParseOverflowNumAsNumStr;
doc.Parse<kParseFlags>(current);
std::string escaped;
if (doc.HasParseError()) {
if (doc.HasParseError() && escapeControlChars) {
// On failure, escape raw control chars and retry (matching jsoniter);
// `escaped` must outlive the member iteration below.
escaped = escapeUnescapedControlChars(current);
Expand Down
19 changes: 19 additions & 0 deletions bolt/functions/sparksql/tests/JsonToMapTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ class JsonToMapTest : public SparkFunctionBaseTest {
expr, makeRowVector({makeFlatVector<StringView>({inputs[0]})}));
}

void setJsonToMapEscapeControlChars(bool escapeControlChars) {
auto config = queryCtx_->queryConfig().rawConfigsCopy();
config[core::QueryConfig::kJsonToMapEscapeControlChars] =
std::to_string(escapeControlChars);
queryCtx_->testingOverrideConfigUnsafe(std::move(config));
}

void testJsonToMap(
const std::vector<StringView>& inputs,
const std::vector<std::pair<StringView, std::optional<StringView>>>&
Expand Down Expand Up @@ -147,6 +154,7 @@ TEST_F(JsonToMapTest, unescapedControlChars) {
// escaped, but the reference Hive UDF (backed by com.jsoniter) accepts raw
// control chars and keeps them verbatim in the values. json_to_map must
// match that lenient behavior instead of returning SQL NULL.
setJsonToMapEscapeControlChars(true);
{
// Raw newlines embedded in several values; they must be preserved.
StringView json =
Expand Down Expand Up @@ -178,6 +186,17 @@ TEST_F(JsonToMapTest, unescapedControlChars) {
}
}

TEST_F(JsonToMapTest, unescapedControlCharsRetryConfig) {
const StringView json = StringView("{\"k\":\"a\nb\"}");
setJsonToMapEscapeControlChars(false);
auto result = evaluateJsonToMap({json});
auto expected = makeNullableMapVector<StringView, StringView>({std::nullopt});
assertEqualVectors(expected, result);

setJsonToMapEscapeControlChars(true);
testJsonToMap({json}, {{"k", StringView("a\nb")}});
}

TEST_F(JsonToMapTest, numericValuesPreserveOriginalText) {
// Numeric values must be returned as their original JSON token, matching the
// Hive reference UDF (com.jsoniter). They must NOT be round-tripped through a
Expand Down