diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 04d974f641ec..86eb1dcf1dcd 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -107,6 +107,7 @@ jobs: -e ARROW_FLIGHT=ON \ -e ARROW_FLIGHT_SQL=ON \ -e ARROW_GCS=ON \ + -e nlohmann_json_SOURCE=BUNDLED \ -e Protobuf_SOURCE=BUNDLED \ -e gRPC_SOURCE=BUNDLED \ ubuntu-ruby diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 7703046c75cd..8b085c8db773 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -173,8 +173,10 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: +# - Abseil is old and we require a version that has CRC32C # - opentelemetry-cpp-dev is not packaged -ENV ARROW_ACERO=ON \ +ENV absl_SOURCE=BUNDLED \ + ARROW_ACERO=ON \ ARROW_AZURE=ON \ ARROW_BUILD_STATIC=ON \ ARROW_BUILD_TESTS=ON \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 79b64dbc2a47..1e2f3e8f8f1a 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -272,6 +272,7 @@ else -DgRPC_SOURCE=${gRPC_SOURCE:-} \ -DGTest_SOURCE=${GTest_SOURCE:-} \ -Dlz4_SOURCE=${lz4_SOURCE:-} \ + -Dnlohmann_json_SOURCE=${nlohmann_json_SOURCE:-} \ -Dopentelemetry-cpp_SOURCE=${opentelemetry_cpp_SOURCE:-} \ -DORC_SOURCE=${ORC_SOURCE:-} \ -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 8742bcb55d90..a263475b45ae 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -629,14 +629,6 @@ else() "${THIRDPARTY_MIRROR_URL}/cares-${ARROW_CARES_BUILD_VERSION}.tar.gz") endif() -if(DEFINED ENV{ARROW_CRC32C_URL}) - set(CRC32C_SOURCE_URL "$ENV{ARROW_CRC32C_URL}") -else() - set_urls(CRC32C_SOURCE_URL - "https://github.com/google/crc32c/archive/${ARROW_CRC32C_BUILD_VERSION}.tar.gz" - ) -endif() - if(DEFINED ENV{ARROW_GBENCHMARK_URL}) set(GBENCHMARK_SOURCE_URL "$ENV{ARROW_GBENCHMARK_URL}") else() @@ -753,7 +745,7 @@ else() ARROW_PROTOBUF_STRIPPED_BUILD_VERSION) # strip the leading `v` set_urls(PROTOBUF_SOURCE_URL - "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz" + "https://github.com/protocolbuffers/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-${ARROW_PROTOBUF_STRIPPED_BUILD_VERSION}.tar.gz" "${THIRDPARTY_MIRROR_URL}/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz") endif() @@ -1890,12 +1882,66 @@ if(ARROW_WITH_THRIFT) list(GET Thrift_VERSION_LIST 2 Thrift_VERSION_PATCH) endif() +# ---------------------------------------------------------------------- +# Abseil defined here so it can be called from build_protobuf() + +function(build_absl) + list(APPEND CMAKE_MESSAGE_INDENT "ABSL: ") + message(STATUS "Building Abseil from source using FetchContent") + set(ABSL_VENDORED + TRUE + PARENT_SCOPE) + + if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) + string(APPEND CMAKE_CXX_FLAGS " -include stdint.h") + endif() + + fetchcontent_declare(absl + ${FC_DECLARE_COMMON_OPTIONS} OVERRIDE_FIND_PACKAGE + URL ${ABSL_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ABSL_BUILD_SHA256_CHECKSUM}") + + prepare_fetchcontent() + + # Unity build causes symbol redefinition errors (e.g. kDigits in + # time_zone_fixed.cc and time_zone_posix.cc anonymous namespaces). + set(CMAKE_UNITY_BUILD OFF) + # We have to enable Abseil install to add Abseil targets to an export set. + # But we don't install Abseil by EXCLUDE_FROM_ALL. + set(ABSL_ENABLE_INSTALL ON) + fetchcontent_makeavailable(absl) + + if(CMAKE_VERSION VERSION_LESS 3.28) + set_property(DIRECTORY ${absl_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) + endif() + + if(APPLE) + # This is due to upstream absl::cctz issue + # https://github.com/abseil/abseil-cpp/issues/283 + find_library(CoreFoundation CoreFoundation) + # When ABSL_ENABLE_INSTALL is ON, the real target is "time" not "absl_time" + # Cannot use set_property on alias targets (absl::time is an alias) + set_property(TARGET time + APPEND + PROPERTY INTERFACE_LINK_LIBRARIES ${CoreFoundation}) + endif() + list(POP_BACK CMAKE_MESSAGE_INDENT) +endfunction() + # ---------------------------------------------------------------------- # Protocol Buffers (required for ORC, Flight and Substrait libraries) function(build_protobuf) list(APPEND CMAKE_MESSAGE_INDENT "Protobuf: ") message(STATUS "Building Protocol Buffers from source using FetchContent") + + # Protobuf requires Abseil. Build Abseil first with OVERRIDE_FIND_PACKAGE + # so that protobuf doesn't build its own copy and we can reuse it on google-cloud-cpp + # if it's also being built. + if(NOT TARGET absl::strings) + build_absl() + endif() + set(PROTOBUF_VENDORED TRUE PARENT_SCOPE) @@ -1911,8 +1957,7 @@ function(build_protobuf) fetchcontent_declare(protobuf ${FC_DECLARE_COMMON_OPTIONS} OVERRIDE_FIND_PACKAGE URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}" - SOURCE_SUBDIR cmake) + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") prepare_fetchcontent() @@ -1930,6 +1975,11 @@ function(build_protobuf) string(REPLACE "-ffat-lto-objects" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") set(protobuf_BUILD_TESTS OFF) + # Always build protobuf as a static library regardless of BUILD_SHARED_LIBS. + # Without this, FetchContent inherits BUILD_SHARED_LIBS from the parent + # project and may build protobuf as a DLL, causing unresolved symbols + # when linking arrow.dll on MSVC. + set(protobuf_BUILD_SHARED_LIBS OFF) if(MSVC AND NOT ARROW_USE_STATIC_CRT) set(protobuf_MSVC_STATIC_RUNTIME OFF) endif() @@ -3054,46 +3104,6 @@ endfunction() # ---------------------------------------------------------------------- # Dependencies for Arrow Flight RPC -function(build_absl) - list(APPEND CMAKE_MESSAGE_INDENT "ABSL: ") - message(STATUS "Building Abseil from source using FetchContent") - set(ABSL_VENDORED - TRUE - PARENT_SCOPE) - - if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0) - string(APPEND CMAKE_CXX_FLAGS " -include stdint.h") - endif() - - fetchcontent_declare(absl - ${FC_DECLARE_COMMON_OPTIONS} OVERRIDE_FIND_PACKAGE - URL ${ABSL_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ABSL_BUILD_SHA256_CHECKSUM}") - - prepare_fetchcontent() - - # We have to enable Abseil install to add Abseil targets to an export set. - # But we don't install Abseil by EXCLUDE_FROM_ALL. - set(ABSL_ENABLE_INSTALL ON) - fetchcontent_makeavailable(absl) - - if(CMAKE_VERSION VERSION_LESS 3.28) - set_property(DIRECTORY ${absl_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) - endif() - - if(APPLE) - # This is due to upstream absl::cctz issue - # https://github.com/abseil/abseil-cpp/issues/283 - find_library(CoreFoundation CoreFoundation) - # When ABSL_ENABLE_INSTALL is ON, the real target is "time" not "absl_time" - # Cannot use set_property on alias targets (absl::time is an alias) - set_property(TARGET time - APPEND - PROPERTY INTERFACE_LINK_LIBRARIES ${CoreFoundation}) - endif() - list(POP_BACK CMAKE_MESSAGE_INDENT) -endfunction() - function(build_grpc) resolve_dependency(c-ares ARROW_CMAKE_PACKAGE_NAME @@ -3141,7 +3151,9 @@ function(build_grpc) set(gRPC_SSL_PROVIDER "none") set(_gRPC_SSL_LIBRARIES "OpenSSL::SSL;OpenSSL::Crypto") - set(gRPC_ZLIB_PROVIDER "package") + set(gRPC_ZLIB_PROVIDER + "package" + CACHE STRING "" FORCE) set(gRPC_INSTALL OFF) set(gRPC_BUILD_TESTS OFF) @@ -3169,8 +3181,7 @@ function(build_grpc) gpr grpc grpc++ - grpc++_reflection - upb) + grpc++_reflection) foreach(target ${GRPC_LIBRARY_TARGETS}) if(TARGET ${target} AND NOT TARGET gRPC::${target}) @@ -3218,8 +3229,7 @@ function(build_grpc) gRPC::address_sorting gRPC::gpr gRPC::grpc - gRPC::grpcpp_for_bundling - gRPC::upb) + gRPC::grpcpp_for_bundling) set(ARROW_BUNDLED_STATIC_LIBS "${ARROW_BUNDLED_STATIC_LIBS}" PARENT_SCOPE) @@ -3227,7 +3237,8 @@ function(build_grpc) endfunction() if(ARROW_WITH_GOOGLE_CLOUD_CPP OR ARROW_WITH_GRPC) - set(ARROW_ABSL_REQUIRED_VERSION 20211102) + # Abseil 20230125 released CRC32C which is necessary for GCS builds + set(ARROW_ABSL_REQUIRED_VERSION 20230125) # Google Cloud C++ SDK and gRPC require Google Abseil if(ARROW_WITH_GOOGLE_CLOUD_CPP) set(ARROW_ABSL_CMAKE_PACKAGE_NAME Arrow) @@ -3307,45 +3318,7 @@ if(ARROW_WITH_GRPC) endif() # ---------------------------------------------------------------------- -# GCS and dependencies - -function(build_crc32c_once) - list(APPEND CMAKE_MESSAGE_INDENT "CRC32C: ") - message(STATUS "Building CRC32C from source using FetchContent") - set(CRC32C_VENDORED - TRUE - PARENT_SCOPE) - set(CRC32C_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/crc32c_fc-install") - set(CRC32C_PREFIX - "${CRC32C_PREFIX}" - PARENT_SCOPE) - - fetchcontent_declare(crc32c - ${FC_DECLARE_COMMON_OPTIONS} OVERRIDE_FIND_PACKAGE - URL ${CRC32C_SOURCE_URL} - URL_HASH "SHA256=${ARROW_CRC32C_BUILD_SHA256_CHECKSUM}") - - prepare_fetchcontent() - - set(CRC32C_BUILD_TESTS OFF) - set(CRC32C_BUILD_BENCHMARKS OFF) - set(CRC32C_USE_GLOG OFF) - fetchcontent_makeavailable(crc32c) - - if(CMAKE_VERSION VERSION_LESS 3.28) - set_property(DIRECTORY ${crc32c_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) - endif() - - # Create alias target for consistency (crc32c exports as Crc32c::crc32c when installed) - if(NOT TARGET Crc32c::crc32c) - add_library(Crc32c::crc32c ALIAS crc32c) - endif() - - set(ARROW_BUNDLED_STATIC_LIBS - ${ARROW_BUNDLED_STATIC_LIBS} Crc32c::crc32c - PARENT_SCOPE) - list(POP_BACK CMAKE_MESSAGE_INDENT) -endfunction() +# OpenTelemetry C++ and dependencies function(build_nlohmann_json) list(APPEND CMAKE_MESSAGE_INDENT "nlohmann-json: ") @@ -3380,6 +3353,114 @@ if(ARROW_WITH_NLOHMANN_JSON) message(STATUS "Found nlohmann_json headers: ${nlohmann_json_INCLUDE_DIR}") endif() +function(build_opentelemetry) + list(APPEND CMAKE_MESSAGE_INDENT "OpenTelemetry: ") + message(STATUS "Building OpenTelemetry from source using FetchContent") + + if(Protobuf_VERSION VERSION_GREATER_EQUAL 3.22) + message(FATAL_ERROR "GH-36013: Can't use bundled OpenTelemetry with Protobuf 3.22 or later. " + "Protobuf is version ${Protobuf_VERSION}") + endif() + + set(OPENTELEMETRY_VENDORED + TRUE + PARENT_SCOPE) + + fetchcontent_declare(opentelemetry_proto + ${FC_DECLARE_COMMON_OPTIONS} + URL ${OPENTELEMETRY_PROTO_SOURCE_URL} + URL_HASH "SHA256=${ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM}" + ) + + # Use FetchContent_Populate instead of MakeAvailable because opentelemetry-proto + # has no CMakeLists.txt. + cmake_policy(PUSH) + if(POLICY CMP0169) + cmake_policy(SET CMP0169 OLD) + endif() + fetchcontent_populate(opentelemetry_proto) + cmake_policy(POP) + + fetchcontent_declare(opentelemetry_cpp + ${FC_DECLARE_COMMON_OPTIONS} + URL ${OPENTELEMETRY_SOURCE_URL} + URL_HASH "SHA256=${ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM}") + + prepare_fetchcontent() + + # Unity build causes symbol redefinition errors in protobuf-generated code + set(CMAKE_UNITY_BUILD FALSE) + set(OTELCPP_PROTO_PATH "${opentelemetry_proto_SOURCE_DIR}") + set(WITH_EXAMPLES OFF) + set(WITH_OTLP_HTTP ON) + set(WITH_OTLP_GRPC OFF) + set(WITH_FUNC_TESTS OFF) + # These options are slated for removal in v1.14 and their features are deemed stable + # as of v1.13. However, setting their corresponding ENABLE_* macros in headers seems + # finicky - resulting in build failures or ABI-related runtime errors during HTTP + # client initialization. There may still be a solution, but we disable them for now. + set(WITH_OTLP_HTTP_SSL_PREVIEW OFF) + set(WITH_OTLP_HTTP_SSL_TLS_PREVIEW OFF) + + fetchcontent_makeavailable(opentelemetry_cpp) + + if(CMAKE_VERSION VERSION_LESS 3.28) + set_property(DIRECTORY ${opentelemetry_cpp_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) + endif() + + # Remove unused directories to save build directory storage + file(REMOVE_RECURSE "${opentelemetry_cpp_SOURCE_DIR}/ci") + + # OpenTelemetry creates its own targets. We need to add them to bundled static libs. + # The targets created by OpenTelemetry's CMakeLists.txt use the opentelemetry:: namespace. + # List of libraries that we actually need and want to bundle. + set(_OPENTELEMETRY_BUNDLED_LIBS + opentelemetry-cpp::common + opentelemetry-cpp::http_client_curl + opentelemetry-cpp::logs + opentelemetry-cpp::ostream_log_record_exporter + opentelemetry-cpp::ostream_span_exporter + opentelemetry-cpp::otlp_http_client + opentelemetry-cpp::otlp_http_log_record_exporter + opentelemetry-cpp::otlp_http_exporter + opentelemetry-cpp::otlp_recordable + opentelemetry-cpp::proto + opentelemetry-cpp::resources + opentelemetry-cpp::trace + opentelemetry-cpp::version) + + list(APPEND ARROW_BUNDLED_STATIC_LIBS ${_OPENTELEMETRY_BUNDLED_LIBS}) + set(ARROW_BUNDLED_STATIC_LIBS + "${ARROW_BUNDLED_STATIC_LIBS}" + PARENT_SCOPE) + + list(POP_BACK CMAKE_MESSAGE_INDENT) +endfunction() + +if(ARROW_WITH_OPENTELEMETRY) + if(NOT ARROW_ENABLE_THREADING) + message(FATAL_ERROR "Can't use OpenTelemetry with ARROW_ENABLE_THREADING=OFF") + endif() + + # cURL is required whether we build from source or use an existing installation + # (OTel's cmake files do not call find_curl for you) + find_curl() + resolve_dependency(opentelemetry-cpp) + set(ARROW_OPENTELEMETRY_LIBS + opentelemetry-cpp::trace + opentelemetry-cpp::logs + opentelemetry-cpp::otlp_http_log_record_exporter + opentelemetry-cpp::ostream_log_record_exporter + opentelemetry-cpp::ostream_span_exporter + opentelemetry-cpp::otlp_http_exporter) + get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api + INTERFACE_INCLUDE_DIRECTORIES) + message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") +endif() + +# ---------------------------------------------------------------------- +# GCS and dependencies + function(build_google_cloud_cpp_storage) list(APPEND CMAKE_MESSAGE_INDENT "google-cloud-cpp: ") message(STATUS "Building google-cloud-cpp from source using FetchContent") @@ -3387,18 +3468,42 @@ function(build_google_cloud_cpp_storage) TRUE PARENT_SCOPE) - # List of dependencies taken from https://github.com/googleapis/google-cloud-cpp/blob/main/doc/packaging.md - build_crc32c_once() + # Workaround missing BCRYPT_RSA_ALG_HANDLE macro in older MinGW-w64 headers. + # google-cloud-cpp v3+ uses it without guards in sign_using_sha256.cc. + set(GOOGLE_CLOUD_CPP_PATCH_COMMAND) + if(MINGW) + find_program(PATCH patch) + if(PATCH) + set(GOOGLE_CLOUD_CPP_PATCH_COMMAND + ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/google-cloud-cpp-bcrypt-mingw.patch) + else() + find_program(GIT git) + if(GIT) + set(GOOGLE_CLOUD_CPP_PATCH_COMMAND + ${GIT} apply ${CMAKE_CURRENT_LIST_DIR}/google-cloud-cpp-bcrypt-mingw.patch) + endif() + endif() + endif() fetchcontent_declare(google_cloud_cpp ${FC_DECLARE_COMMON_OPTIONS} + PATCH_COMMAND ${GOOGLE_CLOUD_CPP_PATCH_COMMAND} URL ${google_cloud_cpp_storage_SOURCE_URL} URL_HASH "SHA256=${ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM}") prepare_fetchcontent() + # google-cloud-cpp v3+ uses BCryptHash() which requires Windows 10+ + if(MINGW) + string(APPEND CMAKE_C_FLAGS " -D_WIN32_WINNT=0x0A01") + string(APPEND CMAKE_CXX_FLAGS " -D_WIN32_WINNT=0x0A01") + endif() + message(STATUS "Only building the google-cloud-cpp::storage component") - set(GOOGLE_CLOUD_CPP_ENABLE storage) + # Disable auto-added features (monitoring, trace, opentelemetry, universe_domain) + # that require gRPC - storage only needs REST/curl. + set(GOOGLE_CLOUD_CPP_ENABLE + "storage;-monitoring;-trace;-opentelemetry;-universe_domain") # We need this to build with OpenSSL 3.0. # See also: https://github.com/googleapis/google-cloud-cpp/issues/8544 set(GOOGLE_CLOUD_CPP_ENABLE_WERROR OFF) @@ -3411,6 +3516,30 @@ function(build_google_cloud_cpp_storage) fetchcontent_makeavailable(google_cloud_cpp) + # In google-cloud-cpp v3+, google_cloud_cpp_common unconditionally compiles + # internal/opentelemetry.cc which #includes opentelemetry-cpp API headers. + # We disable the opentelemetry feature (to avoid pulling in gRPC), but we + # still need to provide the OTel API include path. We add it directly + # (instead of linking opentelemetry-cpp::api) to avoid export set issues. + if(TARGET google_cloud_cpp_common) + if(TARGET opentelemetry-cpp::api) + get_target_property(_otel_api_includes opentelemetry-cpp::api + INTERFACE_INCLUDE_DIRECTORIES) + else() + # OpenTelemetry is not available (ARROW_WITH_OPENTELEMETRY=OFF), but we + # still need the header-only API for google-cloud-cpp v3+. Fetch just + # the source to get the headers without building anything. + fetchcontent_declare(opentelemetry_cpp + ${FC_DECLARE_COMMON_OPTIONS} + URL ${OPENTELEMETRY_SOURCE_URL} + URL_HASH "SHA256=${ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM}") + fetchcontent_populate(opentelemetry_cpp) + set(_otel_api_includes "${opentelemetry_cpp_SOURCE_DIR}/api/include") + endif() + target_include_directories(google_cloud_cpp_common SYSTEM + PUBLIC $) + endif() + if(CMAKE_VERSION VERSION_LESS 3.28) set_property(DIRECTORY ${google_cloud_cpp_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) endif() @@ -3441,6 +3570,10 @@ function(build_google_cloud_cpp_storage) absl::cordz_functions absl::cordz_info absl::cordz_handle + absl::crc32c + absl::crc_internal + absl::crc_cord_state + absl::crc_cpu_detect absl::debugging_internal absl::demangle_internal absl::exponential_biased @@ -3582,8 +3715,18 @@ function(build_orc) fetchcontent_makeavailable(orc) + # ORC 2.2.1 unconditionally adds /std:c++17 on MSVC via + # add_compile_options, which overrides CMAKE_CXX_STANDARD and causes + # ABI mismatches with protobuf (GlobalEmptyStringConstexpr vs + # GlobalEmptyStringDynamicInit). Override the standard on the orc target. + # Fixed in ORC 2.3.0: https://github.com/apache/orc/commit/7674f43 + if(MSVC) + target_compile_options(orc PRIVATE "/std:c++${CMAKE_CXX_STANDARD}") + endif() + add_library(orc::orc INTERFACE IMPORTED) target_link_libraries(orc::orc INTERFACE orc) + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) # ar -M rejects paths with "c++/" because "+" is a line continuation # character in MRI scripts, so we have to create a copy of the static lib @@ -3615,6 +3758,12 @@ function(build_orc) INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) + # Protobuf >= v22 headers transitively include Abseil headers. + # ORC build needs the Abseil include directory if we're using a bundled Abseil. + if(absl_SOURCE_DIR) + set(ORC_ABSL_INCLUDE_DIR "${absl_SOURCE_DIR}") + endif() + get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) @@ -3625,8 +3774,14 @@ function(build_orc) get_target_property(ORC_ZLIB_ROOT ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_ROOT}" DIRECTORY) + if(ORC_ABSL_INCLUDE_DIR) + set(ORC_CXX_FLAGS "${EP_CXX_FLAGS} -isystem ${ORC_ABSL_INCLUDE_DIR}") + else() + set(ORC_CXX_FLAGS "${EP_CXX_FLAGS}") + endif() set(ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_CXX_FLAGS=${ORC_CXX_FLAGS}" "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" -DSTOP_BUILD_ON_WARNING=OFF -DBUILD_LIBHDFSPP=OFF @@ -3711,114 +3866,6 @@ if(ARROW_ORC) endif() endif() -# ---------------------------------------------------------------------- -# OpenTelemetry C++ - -function(build_opentelemetry) - list(APPEND CMAKE_MESSAGE_INDENT "OpenTelemetry: ") - message(STATUS "Building OpenTelemetry from source using FetchContent") - - if(Protobuf_VERSION VERSION_GREATER_EQUAL 3.22) - message(FATAL_ERROR "GH-36013: Can't use bundled OpenTelemetry with Protobuf 3.22 or later. " - "Protobuf is version ${Protobuf_VERSION}") - endif() - - set(OPENTELEMETRY_VENDORED - TRUE - PARENT_SCOPE) - - fetchcontent_declare(opentelemetry_proto - ${FC_DECLARE_COMMON_OPTIONS} - URL ${OPENTELEMETRY_PROTO_SOURCE_URL} - URL_HASH "SHA256=${ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM}" - ) - - # Use FetchContent_Populate instead of MakeAvailable because opentelemetry-proto - # has no CMakeLists.txt. - cmake_policy(PUSH) - if(POLICY CMP0169) - cmake_policy(SET CMP0169 OLD) - endif() - fetchcontent_populate(opentelemetry_proto) - cmake_policy(POP) - - fetchcontent_declare(opentelemetry_cpp - ${FC_DECLARE_COMMON_OPTIONS} - URL ${OPENTELEMETRY_SOURCE_URL} - URL_HASH "SHA256=${ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM}") - - prepare_fetchcontent() - - # Unity build causes symbol redefinition errors in protobuf-generated code - set(CMAKE_UNITY_BUILD FALSE) - set(OTELCPP_PROTO_PATH "${opentelemetry_proto_SOURCE_DIR}") - set(WITH_EXAMPLES OFF) - set(WITH_OTLP_HTTP ON) - set(WITH_OTLP_GRPC OFF) - set(WITH_FUNC_TESTS OFF) - # These options are slated for removal in v1.14 and their features are deemed stable - # as of v1.13. However, setting their corresponding ENABLE_* macros in headers seems - # finicky - resulting in build failures or ABI-related runtime errors during HTTP - # client initialization. There may still be a solution, but we disable them for now. - set(WITH_OTLP_HTTP_SSL_PREVIEW OFF) - set(WITH_OTLP_HTTP_SSL_TLS_PREVIEW OFF) - - fetchcontent_makeavailable(opentelemetry_cpp) - - if(CMAKE_VERSION VERSION_LESS 3.28) - set_property(DIRECTORY ${opentelemetry_cpp_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) - endif() - - # Remove unused directories to save build directory storage - file(REMOVE_RECURSE "${opentelemetry_cpp_SOURCE_DIR}/ci") - - # OpenTelemetry creates its own targets. We need to add them to bundled static libs. - # The targets created by OpenTelemetry's CMakeLists.txt use the opentelemetry:: namespace. - # List of libraries that we actually need and want to bundle. - set(_OPENTELEMETRY_BUNDLED_LIBS - opentelemetry-cpp::common - opentelemetry-cpp::http_client_curl - opentelemetry-cpp::logs - opentelemetry-cpp::ostream_log_record_exporter - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_client - opentelemetry-cpp::otlp_http_log_record_exporter - opentelemetry-cpp::otlp_http_exporter - opentelemetry-cpp::otlp_recordable - opentelemetry-cpp::proto - opentelemetry-cpp::resources - opentelemetry-cpp::trace - opentelemetry-cpp::version) - - list(APPEND ARROW_BUNDLED_STATIC_LIBS ${_OPENTELEMETRY_BUNDLED_LIBS}) - set(ARROW_BUNDLED_STATIC_LIBS - "${ARROW_BUNDLED_STATIC_LIBS}" - PARENT_SCOPE) - - list(POP_BACK CMAKE_MESSAGE_INDENT) -endfunction() - -if(ARROW_WITH_OPENTELEMETRY) - if(NOT ARROW_ENABLE_THREADING) - message(FATAL_ERROR "Can't use OpenTelemetry with ARROW_ENABLE_THREADING=OFF") - endif() - - # cURL is required whether we build from source or use an existing installation - # (OTel's cmake files do not call find_curl for you) - find_curl() - resolve_dependency(opentelemetry-cpp) - set(ARROW_OPENTELEMETRY_LIBS - opentelemetry-cpp::trace - opentelemetry-cpp::logs - opentelemetry-cpp::otlp_http_log_record_exporter - opentelemetry-cpp::ostream_log_record_exporter - opentelemetry-cpp::ostream_span_exporter - opentelemetry-cpp::otlp_http_exporter) - get_target_property(OPENTELEMETRY_INCLUDE_DIR opentelemetry-cpp::api - INTERFACE_INCLUDE_DIRECTORIES) - message(STATUS "Found OpenTelemetry headers: ${OPENTELEMETRY_INCLUDE_DIR}") -endif() - # ---------------------------------------------------------------------- # AWS SDK for C++ diff --git a/cpp/cmake_modules/google-cloud-cpp-bcrypt-mingw.patch b/cpp/cmake_modules/google-cloud-cpp-bcrypt-mingw.patch new file mode 100644 index 000000000000..af8afe8157c2 --- /dev/null +++ b/cpp/cmake_modules/google-cloud-cpp-bcrypt-mingw.patch @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +diff --git a/google/cloud/internal/win32/sign_using_sha256.cc b/google/cloud/internal/win32/sign_using_sha256.cc +index 15ecc6e6f9..9595ed9eb3 100644 +--- a/google/cloud/internal/win32/sign_using_sha256.cc ++++ b/google/cloud/internal/win32/sign_using_sha256.cc +@@ -99,6 +99,11 @@ StatusOr, + decltype(&BCryptDestroyKey)>> + CreateRsaBCryptKey(std::vector buffer) { + BCRYPT_KEY_HANDLE key_handle; ++ // Workaround missing macros in MinGW-w64: ++ // https://github.com/mingw-w64/mingw-w64/issues/49 ++ #ifndef BCRYPT_RSA_ALG_HANDLE ++ #define BCRYPT_RSA_ALG_HANDLE ((BCRYPT_ALG_HANDLE)0x000000e1) ++ #endif + if (BCryptImportKeyPair(BCRYPT_RSA_ALG_HANDLE, nullptr, + BCRYPT_RSAPRIVATE_BLOB, &key_handle, buffer.data(), + static_cast(buffer.size()), +diff --git a/google/cloud/storage/internal/md5hash.cc b/google/cloud/storage/internal/md5hash.cc +index 00c72e0e5d..7fbd04507f 100644 +--- a/google/cloud/storage/internal/md5hash.cc ++++ b/google/cloud/storage/internal/md5hash.cc +@@ -31,6 +31,11 @@ GOOGLE_CLOUD_CPP_INLINE_NAMESPACE_BEGIN + std::vector MD5Hash(absl::string_view payload) { + #ifdef _WIN32 + std::vector digest(16); ++ // Workaround missing macros in MinGW-w64: ++ // https://github.com/mingw-w64/mingw-w64/issues/49 ++ #ifndef BCRYPT_MD5_ALG_HANDLE ++ #define BCRYPT_MD5_ALG_HANDLE ((BCRYPT_ALG_HANDLE)0x00000021) ++ #endif + BCryptHash(BCRYPT_MD5_ALG_HANDLE, nullptr, 0, + reinterpret_cast(const_cast(payload.data())), + static_cast(payload.size()), digest.data(), +diff --git a/google/cloud/storage/internal/win32/hash_function_impl.cc b/google/cloud/storage/internal/win32/hash_function_impl.cc +index 655c9f0991..07b2937c94 100644 +--- a/google/cloud/storage/internal/win32/hash_function_impl.cc ++++ b/google/cloud/storage/internal/win32/hash_function_impl.cc +@@ -33,6 +33,11 @@ using ContextPtr = + + ContextPtr CreateMD5HashCtx() { + BCRYPT_HASH_HANDLE hHash = nullptr; ++ // Workaround missing macros in MinGW-w64: ++ // https://github.com/mingw-w64/mingw-w64/issues/49 ++ #ifndef BCRYPT_MD5_ALG_HANDLE ++ #define BCRYPT_MD5_ALG_HANDLE ((BCRYPT_ALG_HANDLE)0x00000021) ++ #endif + BCryptCreateHash(BCRYPT_MD5_ALG_HANDLE, &hHash, nullptr, 0, nullptr, 0, 0); + return ContextPtr(hHash); + } diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index d94bf652ee86..d96258cd70c3 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -23,8 +23,8 @@ # `DEPENDENCIES` array (see the comment on top of the declaration for the # format). -ARROW_ABSL_BUILD_VERSION=20211102.0 -ARROW_ABSL_BUILD_SHA256_CHECKSUM=dcf71b9cba8dc0ca9940c4b316a0c796be8fab42b070bb6b7cab62b48f0e66c4 +ARROW_ABSL_BUILD_VERSION=20250127.0 +ARROW_ABSL_BUILD_SHA256_CHECKSUM=16242f394245627e508ec6bb296b433c90f8d914f73b9c026fddb905e27276e8 ARROW_AWS_C_AUTH_BUILD_VERSION=v0.9.0 ARROW_AWS_C_AUTH_BUILD_SHA256_CHECKSUM=aa6e98864fefb95c249c100da4ae7aed36ba13a8a91415791ec6fad20bec0427 ARROW_AWS_C_CAL_BUILD_VERSION=v0.9.2 @@ -64,18 +64,16 @@ ARROW_BZIP2_BUILD_VERSION=1.0.8 ARROW_BZIP2_BUILD_SHA256_CHECKSUM=ab5a03176ee106d3f0fa90e381da478ddae405918153cca248e682cd0c4a2269 ARROW_CARES_BUILD_VERSION=1.17.2 ARROW_CARES_BUILD_SHA256_CHECKSUM=4803c844ce20ce510ef0eb83f8ea41fa24ecaae9d280c468c582d2bb25b3913d -ARROW_CRC32C_BUILD_VERSION=1.1.2 -ARROW_CRC32C_BUILD_SHA256_CHECKSUM=ac07840513072b7fcebda6e821068aa04889018f24e10e46181068fb214d7e56 ARROW_GBENCHMARK_BUILD_VERSION=v1.8.3 ARROW_GBENCHMARK_BUILD_SHA256_CHECKSUM=6bc180a57d23d4d9515519f92b0c83d61b05b5bab188961f36ac7b06b0d9e9ce ARROW_GFLAGS_BUILD_VERSION=v2.2.2 ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf ARROW_GLOG_BUILD_VERSION=v0.5.0 ARROW_GLOG_BUILD_SHA256_CHECKSUM=eede71f28371bf39aa69b45de23b329d37214016e2055269b3b5e7cfd40b59f5 -ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v2.22.0 -ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=0c68782e57959c82e0c81def805c01460a042c1aae0c2feee905acaa2a2dc9bf -ARROW_GRPC_BUILD_VERSION=v1.46.3 -ARROW_GRPC_BUILD_SHA256_CHECKSUM=d6cbf22cb5007af71b61c6be316a79397469c58c82a942552a62e708bce60964 +ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v3.2.0 +ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=79ed6303d5b0c46f627bf8b38b5bd6b007c85e76c88722eb570a495bc2b579a0 +ARROW_GRPC_BUILD_VERSION=v1.76.0 +ARROW_GRPC_BUILD_SHA256_CHECKSUM=0af37b800953130b47c075b56683ee60bdc3eda3c37fc6004193f5b569758204 ARROW_GTEST_BUILD_VERSION=1.17.0 ARROW_GTEST_BUILD_SHA256_CHECKSUM=65fab701d9829d38cb77c14acdc431d2108bfdbf8979e40eb8ae567edf10b27c ARROW_JEMALLOC_BUILD_VERSION=5.3.0 @@ -92,8 +90,8 @@ ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v1.7.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=11330d850f5e24d34c4246bc8cb21fcd311e7565d219195713455a576bb11bed ARROW_ORC_BUILD_VERSION=2.2.1 ARROW_ORC_BUILD_SHA256_CHECKSUM=52fc762332442e8b05d7182f8c035f9e04d945b9a52be22ab69f28b3f37d4500 -ARROW_PROTOBUF_BUILD_VERSION=v21.3 -ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f +ARROW_PROTOBUF_BUILD_VERSION=v31.1 +ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=12bfd76d27b9ac3d65c00966901609e020481b9474ef75c7ff4601ac06fa0b82 # Because of https://github.com/Tencent/rapidjson/pull/1323, we require # a pre-release version of RapidJSON to build with GCC 8 without # warnings. @@ -150,7 +148,6 @@ DEPENDENCIES=( "ARROW_BROTLI_URL brotli-${ARROW_BROTLI_BUILD_VERSION}.tar.gz https://github.com/google/brotli/archive/${ARROW_BROTLI_BUILD_VERSION}.tar.gz" "ARROW_BZIP2_URL bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz" "ARROW_CARES_URL cares-${ARROW_CARES_BUILD_VERSION}.tar.gz https://github.com/c-ares/c-ares/releases/download/cares-${ARROW_CARES_BUILD_VERSION//./_}/c-ares-${ARROW_CARES_BUILD_VERSION}.tar.gz" - "ARROW_CRC32C_URL crc32c-${ARROW_CRC32C_BUILD_VERSION}.tar.gz https://github.com/google/crc32c/archive/refs/tags/${ARROW_CRC32C_BUILD_VERSION}.tar.gz" "ARROW_GBENCHMARK_URL gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz" "ARROW_GFLAGS_URL gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz" "ARROW_GLOG_URL glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz" @@ -164,7 +161,7 @@ DEPENDENCIES=( "ARROW_OPENTELEMETRY_URL opentelemetry-cpp-${ARROW_OPENTELEMETRY_BUILD_VERSION}.tar.gz https://github.com/open-telemetry/opentelemetry-cpp/archive/refs/tags/${ARROW_OPENTELEMETRY_BUILD_VERSION}.tar.gz" "ARROW_OPENTELEMETRY_PROTO_URL opentelemetry-proto-${ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION}.tar.gz https://github.com/open-telemetry/opentelemetry-proto/archive/refs/tags/${ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION}.tar.gz" "ARROW_ORC_URL orc-${ARROW_ORC_BUILD_VERSION}.tar.gz https://www.apache.org/dyn/closer.lua/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz?action=download" - "ARROW_PROTOBUF_URL protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-all-${ARROW_PROTOBUF_BUILD_VERSION:1}.tar.gz" + "ARROW_PROTOBUF_URL protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz https://github.com/google/protobuf/releases/download/${ARROW_PROTOBUF_BUILD_VERSION}/protobuf-${ARROW_PROTOBUF_BUILD_VERSION:1}.tar.gz" "ARROW_RAPIDJSON_URL rapidjson-${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz https://github.com/miloyip/rapidjson/archive/${ARROW_RAPIDJSON_BUILD_VERSION}.tar.gz" "ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz" "ARROW_S2N_TLS_URL s2n-${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz https://github.com/aws/s2n-tls/archive/${ARROW_S2N_TLS_BUILD_VERSION}.tar.gz"