Skip to content

Commit d6ce56e

Browse files
GH-49470: [C++][Gandiva] Fix crashes in substring_index and truncate with extreme integer values (#49471)
### Rationale for this change Two Gandiva functions crash when called with extreme integer parameter values: 1. `substring_index(VARCHAR, VARCHAR, INT)` crashes with SIGBUS when count is `INT_MIN` 2. `truncate(BIGINT, INT)` crashes with SIGSEGV when scale is `INT_MAX` or `INT_MIN` ### What changes are included in this PR? **substring_index fix** (`gdv_string_function_stubs.cc`): - Replace `abs(cnt)` with safe `int64_t` computation to avoid undefined behavior when `cnt == INT_MIN` **truncate fix** (`precompiled/extended_math_ops.cc`): - Return input unchanged for positive scales (no-op for integers) - Return 0 for scales < -38 to prevent out-of-bounds access in `GetScaleMultiplier` ### Are these changes tested? Yes. Added coverage for `INT_MAX`/`INT_MIN` values in `gdv_function_stubs_test.cc` and `extended_math_ops_test.cc`. ### Are there any user-facing changes? No. **This PR contains a "Critical Fix".** These changes fix crashes caused by: - `abs(INT_MIN)` triggering undefined behavior (integer overflow) in `substring_index` - Out-of-bounds array access in `GetScaleMultiplier` when `truncate` receives extreme scale values * GitHub Issue: #49470 Authored-by: Dmitry Chirkov <dmitry.chirkov@dremio.com> Signed-off-by: Rossi Sun <zanmato1984@gmail.com>
1 parent a315b96 commit d6ce56e

File tree

4 files changed

+48
-9
lines changed

4 files changed

+48
-9
lines changed

cpp/src/gandiva/gdv_function_stubs_test.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include <gmock/gmock.h>
2222
#include <gtest/gtest.h>
2323

24+
#include <limits>
25+
2426
#include "arrow/util/logging.h"
2527
#include "gandiva/execution_context.h"
2628

@@ -570,6 +572,21 @@ TEST(TestGdvFnStubs, TestSubstringIndex) {
570572
out_str = gdv_fn_substring_index(ctx_ptr, "路学\\L", 8, "\\", 1, -1, &out_len);
571573
EXPECT_EQ(std::string(out_str, out_len), "L");
572574
EXPECT_FALSE(ctx.has_error());
575+
576+
// Large counts return full string when delimiter not found enough times
577+
out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1, -1000, &out_len);
578+
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
579+
EXPECT_FALSE(ctx.has_error());
580+
581+
out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1,
582+
std::numeric_limits<int32_t>::max(), &out_len);
583+
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
584+
EXPECT_FALSE(ctx.has_error());
585+
586+
out_str = gdv_fn_substring_index(ctx_ptr, "a.b.c", 5, ".", 1,
587+
std::numeric_limits<int32_t>::min(), &out_len);
588+
EXPECT_EQ(std::string(out_str, out_len), "a.b.c");
589+
EXPECT_FALSE(ctx.has_error());
573590
}
574591

575592
TEST(TestGdvFnStubs, TestUpper) {

cpp/src/gandiva/gdv_string_function_stubs.cc

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -428,14 +428,17 @@ const char* gdv_fn_substring_index(int64_t context, const char* txt, int32_t txt
428428
}
429429
}
430430

431-
if (static_cast<int32_t>(abs(cnt)) <= static_cast<int32_t>(occ.size()) && cnt > 0) {
431+
// Use int64_t to avoid undefined behavior with abs(INT_MIN)
432+
int64_t abs_cnt = (cnt < 0) ? -static_cast<int64_t>(cnt) : static_cast<int64_t>(cnt);
433+
int64_t occ_size = static_cast<int64_t>(occ.size());
434+
435+
if (abs_cnt <= occ_size && cnt > 0) {
432436
memcpy(out, txt, occ[cnt - 1]);
433437
*out_len = occ[cnt - 1];
434438
return out;
435-
} else if (static_cast<int32_t>(abs(cnt)) <= static_cast<int32_t>(occ.size()) &&
436-
cnt < 0) {
437-
int32_t sz = static_cast<int32_t>(occ.size());
438-
int32_t temp = static_cast<int32_t>(abs(cnt));
439+
} else if (abs_cnt <= occ_size && cnt < 0) {
440+
int64_t sz = occ_size;
441+
int64_t temp = abs_cnt;
439442

440443
memcpy(out, txt + occ[sz - temp] + pat_len, txt_len - occ[sz - temp] - pat_len);
441444
*out_len = txt_len - occ[sz - temp] - pat_len;

cpp/src/gandiva/precompiled/extended_math_ops.cc

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -386,16 +386,22 @@ gdv_int64 get_power_of_10(gdv_int32 exp) {
386386

387387
FORCE_INLINE
388388
gdv_int64 truncate_int64_int32(gdv_int64 in, gdv_int32 out_scale) {
389+
// For int64 (no fractional digits), positive scale is a no-op
390+
if (out_scale >= 0) {
391+
return in;
392+
}
393+
// GetScaleMultiplier only supports scales 0-38
394+
if (out_scale < -38) {
395+
return 0;
396+
}
397+
389398
bool overflow = false;
390399
arrow::BasicDecimal128 decimal = gandiva::decimalops::FromInt64(in, 38, 0, &overflow);
391400
arrow::BasicDecimal128 decimal_with_outscale =
392401
gandiva::decimalops::Truncate(gandiva::BasicDecimalScalar128(decimal, 38, 0), 38,
393402
out_scale, out_scale, &overflow);
394-
if (out_scale < 0) {
395-
out_scale = 0;
396-
}
397403
return gandiva::decimalops::ToInt64(
398-
gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, out_scale), &overflow);
404+
gandiva::BasicDecimalScalar128(decimal_with_outscale, 38, 0), &overflow);
399405
}
400406

401407
FORCE_INLINE

cpp/src/gandiva/precompiled/extended_math_ops_test.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include <gtest/gtest.h>
2323

2424
#include <cmath>
25+
#include <limits>
2526

2627
#include "gandiva/execution_context.h"
2728
#include "gandiva/precompiled/types.h"
@@ -208,6 +209,18 @@ TEST(TestExtendedMathOps, TestTruncate) {
208209
EXPECT_EQ(truncate_int64_int32(-1234, -2), -1200);
209210
EXPECT_EQ(truncate_int64_int32(8124674407369523212, 0), 8124674407369523212);
210211
EXPECT_EQ(truncate_int64_int32(8124674407369523212, -2), 8124674407369523200);
212+
213+
// Positive scales are no-op for int64 (no fractional digits)
214+
EXPECT_EQ(truncate_int64_int32(12345, std::numeric_limits<int32_t>::max()), 12345);
215+
EXPECT_EQ(truncate_int64_int32(-12345, std::numeric_limits<int32_t>::max()), -12345);
216+
EXPECT_EQ(truncate_int64_int32(12345, 100), 12345);
217+
EXPECT_EQ(truncate_int64_int32(12345, 39), 12345);
218+
219+
// Scales beyond [-38, 0) truncate all digits
220+
EXPECT_EQ(truncate_int64_int32(12345, std::numeric_limits<int32_t>::min()), 0);
221+
EXPECT_EQ(truncate_int64_int32(12345, -100), 0);
222+
EXPECT_EQ(truncate_int64_int32(12345, -39), 0);
223+
EXPECT_EQ(truncate_int64_int32(-99999, -39), 0);
211224
}
212225

213226
TEST(TestExtendedMathOps, TestTrigonometricFunctions) {

0 commit comments

Comments
 (0)