From 01e3a73b9619140af0cac1333c76ad43600952c3 Mon Sep 17 00:00:00 2001 From: Ada Zhang Date: Mon, 13 Apr 2026 21:35:12 -0700 Subject: [PATCH] Internal PiperOrigin-RevId: 899344435 --- pkg/BUILD.bazel | 1 + upb/base/error_handler.h | 1 + upb/message/BUILD | 92 ++++ upb/message/convert.c | 508 ++++++++++++++++++++++ upb/message/convert.h | 56 +++ upb/message/convert_fuzz_test.cc | 340 +++++++++++++++ upb/message/convert_test.cc | 718 +++++++++++++++++++++++++++++++ upb/message/convert_test.proto | 118 +++++ upb/message/test.cc | 94 ++-- upb/wire/BUILD | 1 + upb/wire/internal/decoder.h | 10 + upb/wire/internal/encoder.h | 4 + 12 files changed, 1910 insertions(+), 33 deletions(-) create mode 100644 upb/message/convert.c create mode 100644 upb/message/convert.h create mode 100644 upb/message/convert_fuzz_test.cc create mode 100644 upb/message/convert_test.cc create mode 100644 upb/message/convert_test.proto diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 00a5da334e490..b04c3b44c4ad2 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -241,6 +241,7 @@ cc_dist_library( "//upb:generated_code_support", "//upb/json", "//upb/message:compare", + "//upb/message:convert", "//upb/message:copy", "//upb/mini_table:compat", "//upb/mini_table:debug_string", diff --git a/upb/base/error_handler.h b/upb/base/error_handler.h index 24c95d0c2a12e..42dd6aefd6261 100644 --- a/upb/base/error_handler.h +++ b/upb/base/error_handler.h @@ -56,6 +56,7 @@ typedef enum { kUpb_ErrorCode_Ok = 0, kUpb_ErrorCode_OutOfMemory = 1, kUpb_ErrorCode_Malformed = 2, + kUpb_ErrorCode_MaxDepthExceeded = 4, } upb_ErrorCode; typedef struct { diff --git a/upb/message/BUILD b/upb/message/BUILD index 0cca4318a7602..5d5a1949a7449 100644 --- a/upb/message/BUILD +++ b/upb/message/BUILD @@ -198,6 +198,61 @@ cc_library( ], ) +# Experimental API, subject to change. +cc_library( + name = "convert", + srcs = [ + "convert.c", + ], + hdrs = [ + "convert.h", + ], + copts = UPB_DEFAULT_COPTS, + visibility = [ + "//upb:__pkg__", + "//upb:friends", + ], + deps = [ + ":compare", + ":internal", + ":message", + "//upb/base", + "//upb/mem", + "//upb/mini_table", + "//upb/mini_table:internal", + "//upb/port", + "//upb/wire", + "//upb/wire:decoder", + "//upb/wire:eps_copy_input_stream", + ], +) + +cc_test( + name = "convert_test", + srcs = ["convert_test.cc"], + deps = [ + ":convert", + ":convert_test_upb_minitable_proto", + ":convert_test_upb_proto", + ":internal", + ":message", + ":message_test_upb_minitable_proto", + ":message_test_upb_proto", + ":promote", + "//upb/base", + "//upb/mem", + "//upb/mini_table", + "//upb/port", + "//upb/test:test_messages_proto2_upb_proto", + "//upb/test:test_messages_proto3_upb_minitable", + "//upb/test:test_messages_proto3_upb_proto", + "//upb/test:test_upb_proto", + "//upb/wire", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + cc_test( name = "merge_test", srcs = ["merge_test.cc"], @@ -416,6 +471,7 @@ cc_test( features = UPB_DEFAULT_FEATURES, deps = [ ":compare", + ":convert", ":internal", ":message", ":message_test_upb_minitable_proto", @@ -497,6 +553,24 @@ cc_test( ], ) +cc_test( + name = "convert_fuzz_test", + srcs = ["convert_fuzz_test.cc"], + deps = [ + ":compare", + ":convert", + ":message", + "//upb/mem", + "//upb/mini_table", + "//upb/mini_table:internal", + "//upb/port", + "//upb/test:fuzz_util", + "//upb/wire", + "@googletest//:gtest", + "@googletest//:gtest_main", + ], +) + filegroup( name = "source_files", srcs = glob( @@ -531,3 +605,21 @@ filegroup( ), visibility = ["//upb:__pkg__"], ) + +proto_library( + name = "convert_test_proto", + testonly = 1, + srcs = ["convert_test.proto"], +) + +upb_minitable_proto_library( + name = "convert_test_upb_minitable_proto", + testonly = 1, + deps = [":convert_test_proto"], +) + +upb_c_proto_library( + name = "convert_test_upb_proto", + testonly = 1, + deps = [":convert_test_proto"], +) diff --git a/upb/message/convert.c b/upb/message/convert.c new file mode 100644 index 0000000000000..b5d815c72601a --- /dev/null +++ b/upb/message/convert.c @@ -0,0 +1,508 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2026 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "upb/message/convert.h" + +#include +#include + +#include "upb/base/descriptor_constants.h" +#include "upb/base/error_handler.h" +#include "upb/base/string_view.h" +#include "upb/mem/arena.h" +#include "upb/message/accessors.h" +#include "upb/message/array.h" +#include "upb/message/compare.h" +#include "upb/message/internal/accessors.h" +#include "upb/message/internal/map_sorter.h" +#include "upb/message/internal/message.h" +#include "upb/message/map.h" +#include "upb/message/message.h" +#include "upb/mini_table/extension.h" +#include "upb/mini_table/extension_registry.h" +#include "upb/mini_table/field.h" +#include "upb/mini_table/internal/message.h" +#include "upb/mini_table/message.h" +#include "upb/wire/decode.h" +#include "upb/wire/encode.h" +#include "upb/wire/eps_copy_input_stream.h" // Added +#include "upb/wire/internal/decoder.h" +#include "upb/wire/internal/encoder.h" + +// Must be last. +#include "upb/port/def.inc" + +typedef struct { + upb_Decoder decoder; + upb_encstate encoder; + upb_Arena* arena; + upb_ErrorHandler err; +} upb_Converter; + +static void upb_Message_SetFieldOrExtension(upb_Message* msg, + const upb_MiniTableField* f, + const upb_MiniTableExtension* ext, + const upb_MessageValue* val, + upb_Arena* arena) { + if (ext != NULL) { + upb_Message_SetExtension(msg, ext, val, arena); + } else { + upb_Message_SetBaseField(msg, f, val); + } +} + +static void upb_Message_EncodeExtensionAsUnknown( + upb_encstate* e, upb_Message* dst, const upb_MiniTable* dst_mt, + const upb_MiniTableExtension* ext, upb_MessageValue val, int depth, + upb_ErrorHandler* err) { + char* buf; + size_t size; + int encode_options = upb_Encode_LimitDepth(0, depth); + bool is_message_set = upb_MiniTable_IsMessageSet(dst_mt); + UPB_PRIVATE(_upb_Encode_Extension)(e, ext, val, is_message_set, &buf, &size, + encode_options); + if (size > 0) { + if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, buf, size, e->arena, + kUpb_AddUnknown_Alias)) { + if (e->err) { + upb_ErrorHandler_ThrowError(err, kUpb_ErrorCode_OutOfMemory); + } + } + } +} + +static void upb_Message_ConvertInternal(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTable* dst_mt, + const upb_MiniTable* src_mt, + const upb_ExtensionRegistry* extreg, + int depth); + +static void upb_Array_DeepConvert(upb_Converter* c, upb_Array* dst, + const upb_Array* src, + const upb_MiniTable* dst_sub_mt, + const upb_MiniTable* src_sub_mt, + const upb_ExtensionRegistry* extreg, + int depth) { + size_t size = upb_Array_Size(src); + if (!upb_Array_Resize(dst, size, c->arena)) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + for (size_t i = 0; i < size; ++i) { + upb_MessageValue src_val = upb_Array_Get(src, i); + const upb_Message* src_msg = src_val.msg_val; + upb_Message* dst_msg = upb_Message_New(dst_sub_mt, c->arena); + if (!dst_msg) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + upb_Message_ConvertInternal(c, dst_msg, src_msg, dst_sub_mt, src_sub_mt, + extreg, depth); + upb_MessageValue dst_val; + dst_val.msg_val = dst_msg; + upb_Array_Set(dst, i, dst_val); + } +} + +static void upb_Map_DeepConvert(upb_Converter* c, upb_Map* dst, + const upb_Map* src, + const upb_MiniTable* dst_entry_mt, + const upb_MiniTable* src_entry_mt, + const upb_ExtensionRegistry* extreg, + int depth) { + const upb_MiniTableField* dst_val_f = upb_MiniTable_MapValue(dst_entry_mt); + const upb_MiniTable* dst_val_mt = upb_MiniTable_SubMessage(dst_val_f); + const upb_MiniTableField* src_val_f = upb_MiniTable_MapValue(src_entry_mt); + const upb_MiniTable* src_val_mt = upb_MiniTable_SubMessage(src_val_f); + + size_t iter = kUpb_Map_Begin; + upb_MessageValue key, src_val; + while (upb_Map_Next(src, &key, &src_val, &iter)) { + const upb_Message* src_msg = src_val.msg_val; + upb_Message* dst_msg = upb_Message_New(dst_val_mt, c->arena); + if (!dst_msg) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + upb_Message_ConvertInternal(c, dst_msg, src_msg, dst_val_mt, src_val_mt, + extreg, depth); + upb_MessageValue dst_val; + dst_val.msg_val = dst_msg; + if (!upb_Map_Set(dst, key, dst_val, c->arena)) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + } +} + +static bool upb_Message_ConvertMapField(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTableField* dst_f, + const upb_MiniTableField* src_f, + const upb_ExtensionRegistry* extreg, + int depth) { + const upb_Map* src_map = upb_Message_GetMap(src, src_f); + if (!src_map) return true; + + const upb_MiniTable* dst_entry_mt = upb_MiniTable_MapEntrySubMessage(dst_f); + const upb_MiniTable* src_entry_mt = upb_MiniTable_MapEntrySubMessage(src_f); + + if (dst_entry_mt != src_entry_mt) { + const upb_MiniTableField* dst_val_f = upb_MiniTable_MapValue(dst_entry_mt); + const upb_MiniTableField* src_val_f = upb_MiniTable_MapValue(src_entry_mt); + if (upb_MiniTableField_CType(dst_val_f) == kUpb_CType_Message && + upb_MiniTableField_CType(src_val_f) == kUpb_CType_Message) { + upb_Map* dst_map = upb_Map_New( + c->arena, + upb_MiniTableField_CType(upb_MiniTable_MapKey(dst_entry_mt)), + kUpb_CType_Message); + if (!dst_map) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + upb_Map_DeepConvert(c, dst_map, src_map, dst_entry_mt, src_entry_mt, + extreg, depth); + upb_Message_SetBaseField(dst, dst_f, &dst_map); + return true; + } + } + return false; +} + +static bool upb_Message_ConvertField(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTableField* dst_f, + const upb_MiniTableField* src_f, + const upb_ExtensionRegistry* extreg, + int depth) { + if (upb_MiniTableField_HasPresence(src_f)) { + if (!upb_Message_HasBaseField(src, src_f)) return true; + } + + if (upb_MiniTableField_CType(dst_f) == kUpb_CType_Message) { + if (upb_MiniTableField_IsScalar(dst_f)) { + UPB_ASSERT(upb_MiniTableField_Type(dst_f) == + upb_MiniTableField_Type(src_f)); + const upb_Message* src_sub = upb_Message_GetMessage(src, src_f); + if (!src_sub) return true; + + const upb_MiniTable* dst_sub_mt = upb_MiniTable_SubMessage(dst_f); + const upb_MiniTable* src_sub_mt = upb_MiniTable_SubMessage(src_f); + + if (dst_sub_mt == src_sub_mt) { + upb_Message_SetMessage(dst, dst_f, (upb_Message*)src_sub); + return true; + } + + upb_Message* dst_sub = + upb_Message_GetOrCreateMutableMessage(dst, dst_f, c->arena); + if (!dst_sub) + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + upb_Message_ConvertInternal(c, dst_sub, src_sub, dst_sub_mt, src_sub_mt, + extreg, depth); + return true; + } else if (upb_MiniTableField_IsArray(dst_f)) { + UPB_ASSERT(upb_MiniTableField_IsArray(dst_f) && + upb_MiniTableField_Type(dst_f) == + upb_MiniTableField_Type(src_f)); + const upb_Array* src_arr = upb_Message_GetArray(src, src_f); + if (!src_arr) return true; + + const upb_MiniTable* dst_sub_mt = upb_MiniTable_SubMessage(dst_f); + const upb_MiniTable* src_sub_mt = upb_MiniTable_SubMessage(src_f); + + if (dst_sub_mt != src_sub_mt) { + upb_Array* dst_arr = upb_Array_New(c->arena, kUpb_CType_Message); + if (!dst_arr) + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + upb_Array_DeepConvert(c, dst_arr, src_arr, dst_sub_mt, src_sub_mt, + extreg, depth); + upb_Message_SetBaseField(dst, dst_f, &dst_arr); + return true; + } + } else if (upb_MiniTableField_IsMap(dst_f)) { + UPB_ASSERT(upb_MiniTableField_IsMap(src_f)); + // Check that the key and value types are the same for the source and + // destination map fields. + const upb_MiniTable* dst_entry_mt = + upb_MiniTable_MapEntrySubMessage(dst_f); + const upb_MiniTable* src_entry_mt = + upb_MiniTable_MapEntrySubMessage(src_f); + const upb_MiniTableField* dst_key_f = upb_MiniTable_MapKey(dst_entry_mt); + const upb_MiniTableField* src_key_f = upb_MiniTable_MapKey(src_entry_mt); + const upb_MiniTableField* dst_val_f = + upb_MiniTable_MapValue(dst_entry_mt); + const upb_MiniTableField* src_val_f = + upb_MiniTable_MapValue(src_entry_mt); + UPB_ASSERT(upb_MiniTableField_Type(dst_key_f) == + upb_MiniTableField_Type(src_key_f) && + upb_MiniTableField_Type(dst_val_f) == + upb_MiniTableField_Type(src_val_f)); + + if (upb_Message_ConvertMapField(c, dst, src, dst_f, src_f, extreg, + depth)) { + return true; + } + } + } + + UPB_ASSERT(upb_MiniTableField_Type(dst_f) == upb_MiniTableField_Type(src_f)); + UPB_PRIVATE(_upb_MiniTableField_DataCopy) + (dst_f, UPB_PRIVATE(_upb_Message_MutableDataPtr)(dst, dst_f), + UPB_PRIVATE(_upb_Message_DataPtr)(src, src_f)); + + if (upb_MiniTableField_HasPresence(dst_f)) { + UPB_PRIVATE(_upb_Message_SetPresence)(dst, dst_f); + } + return true; +} + +static void upb_Message_ConvertExtensions(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTable* dst_mt, + const upb_ExtensionRegistry* extreg, + int depth) { + const upb_MiniTableExtension* ext; + upb_MessageValue val; + uintptr_t iter = kUpb_Message_ExtensionBegin; + while (upb_Message_NextExtension(src, &ext, &val, &iter)) { + const upb_MiniTableField* dst_f = upb_MiniTable_FindFieldByNumber( + dst_mt, upb_MiniTableExtension_Number(ext)); + const upb_MiniTableExtension* dst_ext = NULL; + if (!dst_f) { + // Source extension not found in the destination schema. Check the + // extension registry. + if (extreg != NULL) { + dst_ext = upb_ExtensionRegistry_Lookup( + extreg, dst_mt, upb_MiniTableExtension_Number(ext)); + if (dst_ext) { + dst_f = upb_MiniTableExtension_ToField(dst_ext); + } + } + } + + if (dst_f) { + const upb_MiniTableField* src_f = upb_MiniTableExtension_ToField(ext); + + UPB_ASSERT(!upb_MiniTableField_IsMap(src_f) && + !upb_MiniTableField_IsMap(dst_f)); + // Type mismatch in extension is undefined behavior. + UPB_ASSERT(upb_MiniTableField_Type(dst_f) == + upb_MiniTableField_Type(src_f) && + upb_MiniTableField_IsArray(dst_f) == + upb_MiniTableField_IsArray(src_f)); + + if (upb_MiniTableField_CType(dst_f) == kUpb_CType_Message) { + const upb_MiniTable* dst_sub_mt = upb_MiniTable_SubMessage(dst_f); + const upb_MiniTable* src_sub_mt = upb_MiniTable_SubMessage(src_f); + + if (upb_MiniTableField_IsArray(dst_f)) { + if (dst_sub_mt != src_sub_mt) { + // Array of messages, and the sub message types differ. Perform + // conversion. + upb_Array* dst_arr = upb_Array_New(c->arena, kUpb_CType_Message); + if (!dst_arr) + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + upb_Array_DeepConvert(c, dst_arr, val.array_val, dst_sub_mt, + src_sub_mt, extreg, depth); + upb_MessageValue valid_val; + valid_val.array_val = dst_arr; + upb_Message_SetFieldOrExtension(dst, dst_f, dst_ext, &valid_val, + c->arena); + } else { + // Array of messages, and the sub message types are the same. + // Shallow copy. + upb_Message_SetFieldOrExtension(dst, dst_f, dst_ext, &val, + c->arena); + } + } else if (dst_sub_mt == src_sub_mt) { + // Scalar message, and the message types are the same. + // Shallow copy. + upb_Message_SetFieldOrExtension(dst, dst_f, dst_ext, &val, c->arena); + } else { + // Scalar message, and the message types differ. Perform conversion. + upb_Message* dst_sub = upb_Message_New(dst_sub_mt, c->arena); + if (!dst_sub) + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + + upb_Message_ConvertInternal(c, dst_sub, val.msg_val, dst_sub_mt, + src_sub_mt, extreg, depth); + + upb_MessageValue valid_val; + valid_val.msg_val = dst_sub; + upb_Message_SetFieldOrExtension(dst, dst_f, dst_ext, &valid_val, + c->arena); + } + } else { + // Scalar non-message type. + upb_Message_SetFieldOrExtension(dst, dst_f, dst_ext, &val, c->arena); + } + } else { + if (dst_mt->UPB_PRIVATE(ext) == kUpb_ExtMode_NonExtendable) { + // Destination message does not support extensions. Encode the extension + // as an unknown field in the destination message. + upb_Message_EncodeExtensionAsUnknown(&c->encoder, dst, dst_mt, ext, val, + depth, &c->err); + } else if (!upb_Message_SetExtension(dst, ext, &val, c->arena)) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + } + } +} + +static void upb_Message_ConvertInternal(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTable* dst_mt, + const upb_MiniTable* src_mt, + const upb_ExtensionRegistry* extreg, + int depth) { + UPB_ASSERT(dst != NULL); + if (--depth == 0) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_MaxDepthExceeded); + } + + const upb_MiniTableField* dst_f = NULL; + const upb_MiniTableField* dst_first = NULL; + const upb_MiniTableField* src_f = NULL; + const upb_MiniTableField* src_first = NULL; + + if (upb_MiniTable_FieldCount(dst_mt) > 0) { + dst_first = upb_MiniTable_GetFieldByIndex(dst_mt, 0); + dst_f = dst_first + upb_MiniTable_FieldCount(dst_mt); + } + if (upb_MiniTable_FieldCount(src_mt) > 0) { + src_first = upb_MiniTable_GetFieldByIndex(src_mt, 0); + src_f = src_first + upb_MiniTable_FieldCount(src_mt); + } + + while (dst_f != dst_first || src_f != src_first) { + uint32_t dst_nr = + dst_f != dst_first ? upb_MiniTableField_Number(dst_f - 1) : 0; + uint32_t src_nr = + src_f != src_first ? upb_MiniTableField_Number(src_f - 1) : 0; + + if (dst_nr == src_nr) { + const upb_MiniTableField* dst_next = dst_f - 1; + const upb_MiniTableField* src_next = src_f - 1; + if (!upb_Message_ConvertField(c, dst, src, dst_next, src_next, extreg, + depth)) { + char* buf; + size_t size; + int encode_options = upb_Encode_LimitDepth(0, depth); + UPB_PRIVATE(_upb_Encode_Field)(&c->encoder, src, src_next, &buf, &size, + encode_options); + if (size > 0) { + if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, buf, size, c->arena, + kUpb_AddUnknown_Alias)) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + } + } + dst_f--; + src_f--; + } else if (dst_nr > src_nr) { + dst_f--; + } else { + const upb_MiniTableField* src_next = src_f - 1; + char* buf; + size_t size; + int encode_options = upb_Encode_LimitDepth(0, depth); + UPB_PRIVATE(_upb_Encode_Field)(&c->encoder, src, src_next, &buf, &size, + encode_options); + if (size > 0) { + if (!UPB_PRIVATE(_upb_Message_AddUnknown)(dst, buf, size, c->arena, + kUpb_AddUnknown_Alias)) { + upb_ErrorHandler_ThrowError(&c->err, kUpb_ErrorCode_OutOfMemory); + } + } + src_f--; + } + } + + if (src_mt->UPB_PRIVATE(ext) != kUpb_ExtMode_NonExtendable) { + upb_Message_ConvertExtensions(c, dst, src, dst_mt, extreg, depth); + } + + upb_StringView data; + size_t iter = kUpb_Message_UnknownBegin; + while (upb_Message_NextUnknown(src, &data, &iter)) { + int decode_options = + upb_Decode_LimitDepth(kUpb_DecodeOption_AliasString, depth); + + // Reuse d. Reset input stream. + const char* ptr = data.data; + upb_Decoder* d = &c->decoder; + upb_EpsCopyInputStream_InitWithErrorHandler(&d->input, &ptr, data.size, + d->err); + upb_Decoder_Reset(d, decode_options, dst); + _upb_Decoder_DecodeMessage(d, ptr, dst, dst_mt); + UPB_ASSERT(d->end_group == DECODE_NOGROUP); + } +} + +static bool upb_Message_DoConvert(upb_Converter* c, upb_Message* dst, + const upb_Message* src, + const upb_MiniTable* dst_mt, + const upb_MiniTable* src_mt, + const upb_ExtensionRegistry* extreg) { + if (UPB_SETJMP(c->err.buf) == 0) { + upb_Message_ConvertInternal(c, dst, src, dst_mt, src_mt, extreg, 100); + return true; + } + return false; +} + +const upb_Message* upb_Message_Convert(const upb_Message* src, + const upb_MiniTable* src_mt, + const upb_MiniTable* dst_mt, + const upb_ExtensionRegistry* extreg, + upb_Arena* arena) { + if (dst_mt == src_mt && extreg == NULL) return src; + + upb_Message* dst = upb_Message_New(dst_mt, arena); + if (!dst) return NULL; + + upb_Converter c; + upb_ErrorHandler_Init(&c.err); + + // Initialize the decoder. + // Initialize decoder once, performing SwapIn. + // We use a NULL buffer initially, effectively a dummy init to set up the + // arena and error handler. Note: we pass &c.err. + upb_Decoder_Init(&c.decoder, NULL, 0, extreg, 0, arena, &c.err, NULL, 0); + + // Initialize the encoder. + c.encoder.status = kUpb_EncodeStatus_Ok; + c.encoder.err = &c.err.buf; + c.encoder.arena = &c.decoder.arena; + _upb_mapsorter_init(&c.encoder.sorter); + + c.arena = &c.decoder.arena; + + if (!upb_Message_DoConvert(&c, dst, src, dst_mt, src_mt, extreg)) { + dst = NULL; + } + +#ifndef NDEBUG + if (dst) { + // Verifies that round-tripping through wire format yields an identical + // message. + char* wire_buf; + size_t wire_size; + upb_Arena* tmp_arena = upb_Arena_New(); + upb_EncodeStatus encode_status = + upb_Encode(src, src_mt, 0, tmp_arena, &wire_buf, &wire_size); + UPB_ASSERT(encode_status == kUpb_EncodeStatus_Ok); + upb_Message* decoded_msg = upb_Message_New(dst_mt, tmp_arena); + upb_DecodeStatus decode_status = upb_Decode( + wire_buf, wire_size, decoded_msg, dst_mt, extreg, 0, tmp_arena); + UPB_ASSERT(decode_status == kUpb_DecodeStatus_Ok); + UPB_ASSERT(upb_Message_IsEqual(dst, decoded_msg, dst_mt, 0)); + upb_Arena_Free(tmp_arena); + } +#endif + + upb_Decoder_Destroy(&c.decoder, arena); + UPB_PRIVATE(_upb_encstate_destroy)(&c.encoder); + return dst; +} diff --git a/upb/message/convert.h b/upb/message/convert.h new file mode 100644 index 0000000000000..f39a23f6cf1e9 --- /dev/null +++ b/upb/message/convert.h @@ -0,0 +1,56 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2023 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#ifndef UPB_MESSAGE_CONVERT_H_ +#define UPB_MESSAGE_CONVERT_H_ + +#include "upb/mem/arena.h" +#include "upb/message/message.h" +#include "upb/mini_table/extension_registry.h" +#include "upb/mini_table/message.h" + +// Must be last. +#include "upb/port/def.inc" + +#ifdef __cplusplus +extern "C" { +#endif + +// Converts a message between two different `upb_MiniTable` types. +// +// It is equivalent to encoding the source message and then decoding it +// using the destination `upb_MiniTable`, but is more efficient when +// both `upb_MiniTable`s are derived from the same proto definition (e.g., via +// tree-shaking). +// +// If the destination message, or any of its submessages, uses the same +// minitable as the matching part of the source (determined by pointer +// identity), the destination will alias the source's memory (shallow copy) +// instead of performing a deep copy. Strings and unknown fields are aliased +// from the input message. +// +// Fields present in the source but not the destination will be encoded and +// added to the destination's unknown fields (or extensions, if the extension +// registry allows it). +// +// NOTE: The source and destination `upb_MiniTable`s must be compatible; the +// function has undefined behavior otherwise. +// +// Returns a new message on success, or NULL on failure. +const upb_Message* upb_Message_Convert(const upb_Message* src, + const upb_MiniTable* src_mt, + const upb_MiniTable* dst_mt, + const upb_ExtensionRegistry* extreg, + upb_Arena* arena); + +#ifdef __cplusplus +} // extern "C" +#endif + +#include "upb/port/undef.inc" + +#endif // UPB_MESSAGE_CONVERT_H_ diff --git a/upb/message/convert_fuzz_test.cc b/upb/message/convert_fuzz_test.cc new file mode 100644 index 0000000000000..b41d61c3241b3 --- /dev/null +++ b/upb/message/convert_fuzz_test.cc @@ -0,0 +1,340 @@ +#include +#include +#include +#include + +#include +#include "upb/mem/arena.h" +#include "upb/message/compare.h" // Global include +#include "upb/message/convert.h" +#include "upb/message/message.h" +#include "upb/mini_table/extension_registry.h" +#include "upb/mini_table/field.h" +#include "upb/mini_table/internal/field.h" +#include "upb/mini_table/internal/message.h" +#include "upb/mini_table/internal/sub.h" +#include "upb/mini_table/message.h" +#include "upb/test/fuzz_util.h" +#include "upb/wire/decode.h" +#include "upb/wire/encode.h" + +// Must be last. +#include "upb/port/def.inc" + +namespace upb { +namespace { + +const upb_MiniTable* SubsetMiniTable(const upb_MiniTable* src, uint64_t mask, + upb_Arena* arena); + +const upb_MiniTable* SubsetMiniTable(const upb_MiniTable* src, uint64_t mask, + upb_Arena* arena) { + std::vector new_fields; + std::vector new_subs; + + int field_count = upb_MiniTable_FieldCount(src); + for (int i = 0; i < field_count; ++i) { + const upb_MiniTableField* f = upb_MiniTable_GetFieldByIndex(src, i); + if (mask & (1ULL << (i % 64))) { + upb_MiniTableField new_f = *f; + if (f->UPB_PRIVATE(submsg_ofs) != kUpb_NoSub) { + new_subs.push_back( + *UPB_PTR_AT(f, f->UPB_PRIVATE(submsg_ofs) * kUpb_SubmsgOffsetBytes, + const upb_MiniTableSub)); + } + new_fields.push_back(new_f); + } + } + + size_t mt_size = sizeof(upb_MiniTable); + size_t fields_bytes = new_fields.size() * sizeof(upb_MiniTableField); + size_t subs_bytes = new_subs.size() * sizeof(upb_MiniTableSub); + + size_t mt_padded_size = UPB_ALIGN_UP(mt_size, 8); + size_t fields_padded_size = UPB_ALIGN_UP(fields_bytes, 8); + size_t total_size = mt_padded_size + fields_padded_size + subs_bytes; + + upb_MiniTable* new_mt = (upb_MiniTable*)upb_Arena_Malloc(arena, total_size); + upb_MiniTableField* fields_ptr = + UPB_PTR_AT(new_mt, mt_padded_size, upb_MiniTableField); + upb_MiniTableSub* subs_ptr = + UPB_PTR_AT(fields_ptr, fields_padded_size, upb_MiniTableSub); + + std::memcpy(new_mt, src, mt_size); + if (!new_fields.empty()) { + std::memcpy(fields_ptr, new_fields.data(), fields_bytes); + } + if (!new_subs.empty()) { + std::memcpy(subs_ptr, new_subs.data(), subs_bytes); + } + + int sub_idx = 0; + for (size_t i = 0; i < new_fields.size(); ++i) { + upb_MiniTableField* f = &fields_ptr[i]; + if (f->UPB_PRIVATE(submsg_ofs) != kUpb_NoSub) { + uintptr_t f_addr = (uintptr_t)f; + uintptr_t subs_addr = (uintptr_t)&subs_ptr[sub_idx++]; + + size_t diff = subs_addr - f_addr; + f->UPB_PRIVATE(submsg_ofs) = (uint16_t)(diff / kUpb_SubmsgOffsetBytes); + } + } + + new_mt->UPB_ONLYBITS(fields) = fields_ptr; + new_mt->UPB_ONLYBITS(field_count) = (uint16_t)new_fields.size(); + new_mt->UPB_PRIVATE(dense_below) = 0; + new_mt->UPB_PRIVATE(table_mask) = -1; + + return new_mt; +} + +void ConvertFuzz(const upb::fuzz::MiniTableFuzzInput& input, uint64_t mask1, + uint64_t mask2, std::string proto_payload, + uint32_t decode_options, uint32_t encode_options) { + upb_Arena* arena = upb_Arena_New(); + + upb_ExtensionRegistry* exts; + const upb_MiniTable* original_mt = + upb::fuzz::BuildMiniTable(input, &exts, arena); + if (!original_mt) { + upb_Arena_Free(arena); + return; + } + + const upb_MiniTable* src_mt = SubsetMiniTable(original_mt, mask1, arena); + const upb_MiniTable* dst_mt = SubsetMiniTable(original_mt, mask2, arena); + + decode_options = upb_Decode_LimitDepth(decode_options, 80); + encode_options = upb_Encode_LimitDepth(encode_options, 80); + + // We don't want to skip unknown fields or check required fields, as these + // will cause the fuzz test to fail or exit early in ways that aren't + // interesting. + encode_options &= + ~(kUpb_EncodeOption_SkipUnknown | kUpb_EncodeOption_CheckRequired); + + upb_Message* msg_orig = upb_Message_New(original_mt, arena); + upb_DecodeStatus status = + upb_Decode(proto_payload.data(), proto_payload.size(), msg_orig, + original_mt, exts, decode_options, arena); + + if (status != kUpb_DecodeStatus_Ok) { + upb_Arena_Free(arena); + return; + } + + upb_Message* msg_src = upb_Message_New(src_mt, arena); + status = upb_Decode(proto_payload.data(), proto_payload.size(), msg_src, + src_mt, exts, decode_options, arena); + + if (status != kUpb_DecodeStatus_Ok) { + upb_Arena_Free(arena); + return; + } + + const upb_Message* msg_dst = + upb_Message_Convert(msg_src, src_mt, dst_mt, nullptr, arena); + if (!msg_dst) { + upb_Arena_Free(arena); + return; + } + + size_t size; + char* bytes; + upb_EncodeStatus enc_status = + upb_Encode(msg_dst, dst_mt, encode_options, arena, &bytes, &size); + + if (enc_status != kUpb_EncodeStatus_Ok) { + upb_Arena_Free(arena); + return; + } + + upb_Message* msg_final = upb_Message_New(original_mt, arena); + status = upb_Decode(bytes, size, msg_final, original_mt, exts, decode_options, + arena); + + if (status != kUpb_DecodeStatus_Ok) { + upb_Arena_Free(arena); + return; + } + + bool equal = upb_Message_IsEqual(msg_final, msg_orig, original_mt, + kUpb_CompareOption_IncludeUnknownFields); + if (!equal) { + abort(); + } + + upb_Arena_Free(arena); +} + +TEST(ConvertFuzz, Convert_IdenticalMinitables_ShallowCopy) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"\331", ""}, {"\257"}, "\351\210", {2147483645}}, + 4867317803475403639ULL, 4867317803475403639ULL, "@B", 3220282077ULL, + 4189253947ULL); +} + +TEST(ConvertFuzz, Convert_DifferentSubsets_DroppedFields_Complex) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", + "", ""}, + {"\306", "", "\331", ""}, + "\314Q\345\322\024\321\017\021\016\016\016", + {1, 1, 0, 4294967295, 4294967295, + 1, 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, + 1, 3135226496, 3135226496, 3135226496, 0, + 3708883408}}, + 14162464183153632842ULL, 2939036597827910531ULL, + "=\032\253\342K\221MQ\nj\n\304\361\364\304", 1620786928, 2077918455); +} + +TEST(ConvertFuzz, Convert_DifferentSubsets_DroppedFields_Simple) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{"", "", "", "", "", "", "", "", "", + "", "", "", "", "", ""}, + {""}, + "", + {4294967295, 4294967295, 0}}, + 4453300303650383170ULL, 13549612121351043620ULL, "]]k\031\320", + 1804506072, 3299701550); +} + +TEST(ConvertFuzz, + Convert_DifferentSubsets_DroppedFields_Regression_89982ef4cfe52331) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"", "DD", "z", "", "", "\200"}, {"\031", "\031", "\031"}, "]]", {}}, + 7595264386850118164ULL, 1703605996941841092ULL, "\300D\315\316\372D", + 3067512230, 2237854510); +} + +TEST(ConvertFuzz, ManualUnknownFieldRepro) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{"I", "a"}}, ~0ULL, 0ULL, + "\010\000", 0, 0); +} + +TEST(ConvertFuzz, Regression_FuzzerFinding_UnknownFields) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"", "", ""}, {"", "\211", "", "{", "\232", "2"}, "", {4294967295}}, + 5205171011975038739ULL, 18014293590545137538ULL, "E\261k\333\334", + 590856108, 53173774); +} + +TEST(ConvertFuzz, RepeatedFieldConfusion) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{"\t", "\352", "R", "$\214\214\214", + "\004", "$F", "\244", "$\374P"}, + {""}, + "{GGG", + {4294967295}}, + 3117542645911838959, 9223372036854775807, "", 2147221503, 16); +} + +TEST(ConvertFuzz, ConvertFuzzRegression) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{{""}, {"\325\242"}, "", {2147483647}}, + 18446744073709551615u, 10490713252739160816u, " u", 2, 4294967231); +} + +TEST(ConvertFuzz, ConvertFuzzRegression_Crash_ConvertInternal_2026_04_07) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {""}, + {"", ">>", "", ""}, + "\357\357\357\357\357*" + "\357\357\357\357\357\357\357\357\357\357\357\357\357\357\357\357\357" + "\357\225\225\225\357\357\357\357\357\357\357\357\357\357\357\357\357" + "\357\357\357\357\357\357\357\357\357", + {4294967295, 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 1}}, + 12411566311597166710ULL, 8799022017001952335ULL, "%%%%%%%%%%%%%%%%%ZZ%", + 2815879784ULL, 3243839465ULL); +} + +TEST(ConvertFuzz, Convert_Fuzz_Crash_Regression) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"", "", ""}, {"", ""}, "\207\254", {4033684729, 4294967294}}, + 7437277671727957814ULL, 9800553862402446025ULL, + "\220Y\203\225\225\366\337\337\004", 3062476814ULL, 4041248646ULL); +} + +TEST(ConvertFuzz, ConvertFuzzRegression2) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{"", ""}, {""}, "", {1903095162}}, + 8810965477460052779, 18446744073709551615u, "\030\030", 114210880, + 8190); +} + +TEST(ConvertFuzz, ConvertFuzzRegression3) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{""}, {}, "\373", {}}, 1, + 9223372036854775807, "uz\010\006\006", 4294967290, 2147483647); +} + +TEST(ConvertFuzz, ConvertFuzzRegression4) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"$", "C", ""}, + {"r"}, + "\270N\270[", + {3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714, 3621539714, + 3621539714, 3621539714, 3621539714, 3621539714}}, + 13429813313754688149ULL, 6819704094736752274ULL, "\255\032\262\337))", + 1611082566, 466142871); +} + +TEST(ConvertFuzz, ConvertFuzzRegression5) { + ConvertFuzz(upb::fuzz::MiniTableFuzzInput{{""}, {"\362", "y"}, "\252", {}}, + 8728619288361297649ULL, 13997967148621545864ULL, + "%\311\311\311\303", 3383556142, 3604956055); +} + +TEST(ConvertFuzz, ConvertFuzzRegression6) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{ + {"", "", "\341", "\341", "\341", "$", "", "", "\022", "\022", "", "", + ""}, + {"\374", "\204"}, + "n\251PXq", + {1, 1, 1, 1, 1, + 1, 1, 1413050348, 1, 1, + 1, 1, 1, 0, 0, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 4, + 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, + 4294967295, 4294967295, 4294967295, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 3028396088, 1, + 1, 1, 1, 1, 4294967295, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1}}, + 304186569338573724ULL, 6555165345294515829ULL, "8W", 3970284081, + 1611359599); +} + +TEST(ConvertFuzz, ConvertFuzzEncodeRegression) { + ConvertFuzz( + upb::fuzz::MiniTableFuzzInput{{"$$$$$$$$$$$$$$$$$$", "", "", "", "", ""}, + {"", ""}, + "D", + {2462394141, 2462394141, 2462394145}}, + 555217012043469213, 13507447059222749214u, + "pm\t\t\t\t\t\t\t\t\t\t\t\005o\t\t\t\t\t\trr\375\375\375r\251r", 32766, + 16); +} + +} // namespace + +} // namespace upb diff --git a/upb/message/convert_test.cc b/upb/message/convert_test.cc new file mode 100644 index 0000000000000..6c15d3539cdc9 --- /dev/null +++ b/upb/message/convert_test.cc @@ -0,0 +1,718 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2023 Google LLC. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd + +#include "upb/message/convert.h" + +#include +#include +#include + +#include +#include "google/protobuf/test_messages_proto3.upb.h" +#include "google/protobuf/test_messages_proto3.upb_minitable.h" +#include "upb/base/descriptor_constants.h" +#include "upb/base/string_view.h" +#include "upb/base/upcast.h" +#include "upb/mem/arena.hpp" +#include "upb/message/accessors.h" +#include "upb/message/array.h" +#include "upb/message/convert_test.upb.h" +#include "upb/message/convert_test.upb_minitable.h" +#include "upb/message/internal/message.h" +#include "upb/message/message.h" +#include "upb/message/test.upb_minitable.h" +#include "upb/mini_table/extension_registry.h" +#include "upb/mini_table/message.h" +#include "upb/wire/decode.h" + +// Must be last to ensure UPB_PRIVATE is defined. +#include "upb/port/def.inc" + +// We use the generated upb_MiniTables from test_messages_proto3. +#define TEST_MT &protobuf_0test_0messages__proto3__TestAllTypesProto3_msg_init + +TEST(ConvertTest, Identity) { + upb::Arena arena; + protobuf_test_messages_proto3_TestAllTypesProto3* msg = + protobuf_test_messages_proto3_TestAllTypesProto3_new(arena.ptr()); + + protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int32(msg, 123); + protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_string( + msg, upb_StringView_FromString("hello")); + + const upb_MiniTable* mt = TEST_MT; + + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + const protobuf_test_messages_proto3_TestAllTypesProto3* dst = + (const protobuf_test_messages_proto3_TestAllTypesProto3*)dst_msg; + + EXPECT_EQ( + 123, + protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32(dst)); + upb_StringView str = + protobuf_test_messages_proto3_TestAllTypesProto3_optional_string(dst); + EXPECT_EQ(std::string("hello"), std::string(str.data, str.size)); +} + +TEST(ConvertTest, AliasSubMessage) { + upb::Arena arena; + protobuf_test_messages_proto3_TestAllTypesProto3* msg = + protobuf_test_messages_proto3_TestAllTypesProto3_new(arena.ptr()); + + protobuf_test_messages_proto3_TestAllTypesProto3_NestedMessage* sub = + protobuf_test_messages_proto3_TestAllTypesProto3_mutable_optional_nested_message( + msg, arena.ptr()); + protobuf_test_messages_proto3_TestAllTypesProto3_NestedMessage_set_a(sub, + 456); + + const upb_MiniTable* mt = TEST_MT; + + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + const protobuf_test_messages_proto3_TestAllTypesProto3* dst = + (const protobuf_test_messages_proto3_TestAllTypesProto3*)dst_msg; + + const protobuf_test_messages_proto3_TestAllTypesProto3_NestedMessage* dst_sub = + protobuf_test_messages_proto3_TestAllTypesProto3_optional_nested_message( + dst); + + // Verify value + EXPECT_EQ(456, + protobuf_test_messages_proto3_TestAllTypesProto3_NestedMessage_a( + dst_sub)); + + // Verify shallow copy (pointer identity) + EXPECT_EQ(sub, dst_sub); +} + +TEST(ConvertTest, UnknownPromotion) { + upb::Arena arena; + + // 1. Create a serialized buffer with just the unknown field. + // Field `optional_int32` has number 1. + // Encode 789 as field 1. + char buf[32]; + char* ptr = buf; + // Tag: 1 << 3 | 0 (varint) = 8 + *ptr++ = 8; + // Value: 789 = 0x315. Varint: 0x95 0x06 + *ptr++ = (char)0x95; + *ptr++ = (char)0x06; + size_t len = ptr - buf; + + // 2. Parse this buffer into Message with NO fields. + const upb_MiniTable* empty_mt = &upb_0test__EmptyMessage_msg_init; + + upb_Message* msg_empty = upb_Message_New(empty_mt, arena.ptr()); + upb_DecodeStatus status = + upb_Decode(buf, len, msg_empty, empty_mt, nullptr, 0, arena.ptr()); + EXPECT_EQ(kUpb_DecodeStatus_Ok, status); + + // 3. Convert `msg_empty` to `dst` (TestAllTypesProto3). + // `dst` HAS field 1. So it should promote the unknown from `msg_empty`. + + const upb_MiniTable* mt = TEST_MT; + + const upb_Message* dst_msg = + upb_Message_Convert(msg_empty, empty_mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + const protobuf_test_messages_proto3_TestAllTypesProto3* dst = + (const protobuf_test_messages_proto3_TestAllTypesProto3*)dst_msg; + + EXPECT_EQ( + 789, + protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32(dst)); +} + +TEST(ConvertTest, Demotion) { + upb::Arena arena; + protobuf_test_messages_proto3_TestAllTypesProto3* msg = + protobuf_test_messages_proto3_TestAllTypesProto3_new(arena.ptr()); + protobuf_test_messages_proto3_TestAllTypesProto3_set_optional_int32(msg, 999); + + // Convert to Empty message. Field 1 should become unknown. + const upb_MiniTable* empty_mt = &upb_0test__EmptyMessage_msg_init; + + const upb_Message* dst = upb_Message_Convert(UPB_UPCAST(msg), TEST_MT, + empty_mt, nullptr, arena.ptr()); + EXPECT_NE(dst, nullptr); + + // Dst should have unknown field 1 with value 999. + size_t iter = kUpb_Message_UnknownBegin; + upb_StringView data; + ASSERT_TRUE(upb_Message_NextUnknown(dst, &data, &iter)); + EXPECT_GE(data.size, 3); + EXPECT_EQ((uint8_t)data.data[0], 8); + EXPECT_EQ((uint8_t)data.data[1], 0xE7); + EXPECT_EQ((uint8_t)data.data[2], 0x07); + EXPECT_FALSE(upb_Message_NextUnknown(dst, &data, &iter)); +} + +TEST(ConvertTest, DeepConvertMap) { + upb::Arena arena; + protobuf_test_messages_proto3_TestAllTypesProto3* msg = + protobuf_test_messages_proto3_TestAllTypesProto3_new(arena.ptr()); + + protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_set( + msg, 10, 20, arena.ptr()); + + const upb_MiniTable* mt = TEST_MT; + + // Self-conversion should work (shallow). + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + const protobuf_test_messages_proto3_TestAllTypesProto3* dst = + (const protobuf_test_messages_proto3_TestAllTypesProto3*)dst_msg; + + int32_t val; + EXPECT_TRUE( + protobuf_test_messages_proto3_TestAllTypesProto3_map_int32_int32_get( + dst, 10, &val)); + EXPECT_EQ(20, val); +} + +TEST(ConvertTest, DeepConvertMapMessage) { + upb::Arena arena; + upb_test_convert_MessageWithMapMessage* msg = + upb_test_convert_MessageWithMapMessage_new(arena.ptr()); + + upb_test_convert_MessageWithInt32* val = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(val, 123); + + upb_test_convert_MessageWithMapMessage_map_msg_set(msg, 10, val, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithMapMessage_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithMapMessageClone_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithMapMessageClone* dst = + (const upb_test_convert_MessageWithMapMessageClone*)dst_msg; + + upb_test_convert_MessageWithInt32Clone* dst_val; + EXPECT_TRUE(upb_test_convert_MessageWithMapMessageClone_map_msg_get( + dst, 10, &dst_val)); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32Clone_f1(dst_val)); + + // It should be a deep copy, not the same pointer. + EXPECT_NE((const void*)dst_val, (const void*)val); +} + +TEST(ConvertTest, ExtensionArrayShallowConversion) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_test_convert_MessageWithInt32* val = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(val, 456); + + upb_Array* ext_arr = upb_Array_New(arena.ptr(), kUpb_CType_Message); + upb_Array_Resize(ext_arr, 1, arena.ptr()); + upb_MessageValue elem_val; + elem_val.msg_val = (const upb_Message*)val; + upb_Array_Set(ext_arr, 0, elem_val); + upb_MessageValue ext_val; + ext_val.array_val = ext_arr; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_repeated_msg_ext, + &ext_val, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithKnownRepeatedMsg_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithKnownRepeatedMsg* dst = + (const upb_test_convert_MessageWithKnownRepeatedMsg*)dst_msg; + + size_t size; + const upb_test_convert_MessageWithInt32* const* dst_arr = + upb_test_convert_MessageWithKnownRepeatedMsg_known_repeated_msg(dst, + &size); + EXPECT_EQ(1, size); + EXPECT_EQ(456, upb_test_convert_MessageWithInt32_f1(dst_arr[0])); + + // Due to minitable identity match, elements are shallow-copied. + EXPECT_EQ((const void*)dst_arr[0], (const void*)val); +} + +TEST(ConvertTest, ExtensionArrayDeepConversion) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_test_convert_MessageWithInt32* val = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(val, 789); + + upb_Array* ext_arr = upb_Array_New(arena.ptr(), kUpb_CType_Message); + upb_Array_Resize(ext_arr, 1, arena.ptr()); + upb_MessageValue elem_val; + elem_val.msg_val = (const upb_Message*)val; + upb_Array_Set(ext_arr, 0, elem_val); + upb_MessageValue ext_val; + ext_val.array_val = ext_arr; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_repeated_msg_ext, + &ext_val, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithKnownRepeatedMsgClone_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithKnownRepeatedMsgClone* dst = + (const upb_test_convert_MessageWithKnownRepeatedMsgClone*)dst_msg; + + size_t size; + const upb_test_convert_MessageWithInt32Clone* const* dst_arr = + upb_test_convert_MessageWithKnownRepeatedMsgClone_known_repeated_msg( + dst, &size); + EXPECT_EQ(1, size); + EXPECT_EQ(789, upb_test_convert_MessageWithInt32Clone_f1(dst_arr[0])); + + // Deep conversion means the pointers are not equal. + EXPECT_NE((const void*)dst_arr[0], (const void*)val); +} + +#if !defined(NDEBUG) +TEST(ConvertTest, MismatchedExtensionFails) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + // Set extension field 1000 to an int32 + upb_MessageValue ext_val; + ext_val.int32_val = 12345; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_int32_ext, &ext_val, + arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + + // Convert to a message where field 1000 is an int64 instead of int32. + // The type mismatch should cause it to skip setting the regular field, + // but it should preserve the int32 wire data into the unknown fields. + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithKnownInt64_msg_init; + + EXPECT_DEATH(upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, nullptr, + arena.ptr()), + ""); +} + +TEST(ConvertTest, ConvertField_IncompatibleCType) { + upb::Arena arena; + upb_test_convert_MessageWithString* msg = + upb_test_convert_MessageWithString_new(arena.ptr()); + upb_test_convert_MessageWithString_set_f1(msg, + upb_StringView_FromString("hello")); + + const upb_MiniTable* src_mt = &upb__test__convert__MessageWithString_msg_init; + const upb_MiniTable* dst_mt = &upb__test__convert__MessageWithInt32_msg_init; + + EXPECT_DEATH(upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, nullptr, + arena.ptr()), + ""); +} + +TEST(ConvertTest, ConvertField_ArrayIncompatibleCType) { + upb::Arena arena; + upb_test_convert_MessageWithRepeatedString* msg = + upb_test_convert_MessageWithRepeatedString_new(arena.ptr()); + upb_StringView* arr = + upb_test_convert_MessageWithRepeatedString_resize_r(msg, 1, arena.ptr()); + arr[0] = upb_StringView_FromString("hello"); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithRepeatedString_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithRepeatedInt32_msg_init; + + EXPECT_DEATH(upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, nullptr, + arena.ptr()), + ""); +} + +TEST(ConvertTest, ConvertField_TypeMismatch) { + upb::Arena arena; + upb_test_convert_MessageWithInt32* msg = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(msg, 12345); + + const upb_MiniTable* src_mt = &upb__test__convert__MessageWithInt32_msg_init; + const upb_MiniTable* dst_mt = &upb__test__convert__MessageWithInt64_msg_init; + + EXPECT_DEATH(upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, nullptr, + arena.ptr()), + ""); +} + +TEST(ConvertTest, ConvertField_MapTypeMismatch) { + upb::Arena arena; + upb_test_convert_MessageWithMapInt32Int32* msg = + upb_test_convert_MessageWithMapInt32Int32_new(arena.ptr()); + upb_test_convert_MessageWithMapInt32Int32_m_set(msg, 123, 456, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithMapInt32Int32_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithMapInt32Int64_msg_init; + + EXPECT_DEATH(upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, nullptr, + arena.ptr()), + ""); +} +#endif + +TEST(ConvertTest, ConvertField_SingularMessageDeep) { + upb::Arena arena; + upb_test_convert_MessageWithMsg* msg = + upb_test_convert_MessageWithMsg_new(arena.ptr()); + upb_test_convert_MessageWithInt32* sub = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(sub, 123); + upb_test_convert_MessageWithMsg_set_msg(msg, sub); + + const upb_MiniTable* src_mt = &upb__test__convert__MessageWithMsg_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithMsgClone_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithMsgClone* dst = + (const upb_test_convert_MessageWithMsgClone*)dst_msg; + + const upb_test_convert_MessageWithInt32Clone* dst_sub = + upb_test_convert_MessageWithMsgClone_msg(dst); + EXPECT_NE(dst_sub, nullptr); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32Clone_f1(dst_sub)); + + // Verify deep copy logic occurred correctly + EXPECT_NE((const void*)dst_sub, (const void*)sub); +} + +TEST(ConvertTest, ConvertField_ArrayMessageShallow) { + upb::Arena arena; + upb_test_convert_MessageWithRepeatedMsg* msg = + upb_test_convert_MessageWithRepeatedMsg_new(arena.ptr()); + upb_test_convert_MessageWithInt32* sub = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(sub, 123); + + upb_test_convert_MessageWithInt32** arr = + upb_test_convert_MessageWithRepeatedMsg_resize_msgs(msg, 1, arena.ptr()); + arr[0] = sub; + + const upb_MiniTable* mt = + &upb__test__convert__MessageWithRepeatedMsg_msg_init; + + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithRepeatedMsg* dst = + (const upb_test_convert_MessageWithRepeatedMsg*)dst_msg; + + size_t size; + const upb_test_convert_MessageWithInt32* const* dst_arr = + upb_test_convert_MessageWithRepeatedMsg_msgs(dst, &size); + EXPECT_EQ(1, size); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32_f1(dst_arr[0])); + + // Shallow copy because the destination MiniTable array element is identical + EXPECT_EQ((const void*)dst_arr[0], (const void*)sub); +} + +TEST(ConvertTest, ConvertField_ArrayMessageDeep) { + upb::Arena arena; + upb_test_convert_MessageWithRepeatedMsg* msg = + upb_test_convert_MessageWithRepeatedMsg_new(arena.ptr()); + upb_test_convert_MessageWithInt32* sub = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(sub, 123); + + upb_test_convert_MessageWithInt32** arr = + upb_test_convert_MessageWithRepeatedMsg_resize_msgs(msg, 1, arena.ptr()); + arr[0] = sub; + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithRepeatedMsg_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithRepeatedMsgClone_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithRepeatedMsgClone* dst = + (const upb_test_convert_MessageWithRepeatedMsgClone*)dst_msg; + + size_t size; + const upb_test_convert_MessageWithInt32Clone* const* dst_arr = + upb_test_convert_MessageWithRepeatedMsgClone_msgs(dst, &size); + EXPECT_EQ(1, size); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32Clone_f1(dst_arr[0])); + + // Deep copy expected + EXPECT_NE((const void*)dst_arr[0], (const void*)sub); +} + +TEST(ConvertTest, ConvertExtensions_ScalarMatch) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_MessageValue ext_val; + ext_val.int32_val = 123; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_int32_ext, &ext_val, + arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = &upb__test__convert__MessageWithKnown_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithKnown* dst = + (const upb_test_convert_MessageWithKnown*)dst_msg; + EXPECT_TRUE(upb_test_convert_MessageWithKnown_has_known_field_int32(dst)); + EXPECT_EQ(123, upb_test_convert_MessageWithKnown_known_field_int32(dst)); +} + +TEST(ConvertTest, ConvertExtensions_SingularMessageShallow) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_test_convert_MessageWithInt32* sub = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(sub, 123); + + upb_MessageValue ext_val; + ext_val.msg_val = UPB_UPCAST(sub); + upb_Message_SetExtension(UPB_UPCAST(msg), upb_test_convert_ext_field_msg_ext, + &ext_val, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithKnownMsg_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithKnownMsg* dst = + (const upb_test_convert_MessageWithKnownMsg*)dst_msg; + const upb_test_convert_MessageWithInt32* dst_sub = + upb_test_convert_MessageWithKnownMsg_known_msg(dst); + EXPECT_NE(dst_sub, nullptr); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32_f1(dst_sub)); + + // Shallow copy expected since we are using the exact same minitable + // underlying message + EXPECT_EQ((const void*)dst_sub, (const void*)sub); +} + +TEST(ConvertTest, ConvertExtensions_SingularMessageDeep) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_test_convert_MessageWithInt32* sub = + upb_test_convert_MessageWithInt32_new(arena.ptr()); + upb_test_convert_MessageWithInt32_set_f1(sub, 123); + + upb_MessageValue ext_val; + ext_val.msg_val = UPB_UPCAST(sub); + upb_Message_SetExtension(UPB_UPCAST(msg), upb_test_convert_ext_field_msg_ext, + &ext_val, arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__MessageWithKnownMsgClone_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithKnownMsgClone* dst = + (const upb_test_convert_MessageWithKnownMsgClone*)dst_msg; + const upb_test_convert_MessageWithInt32Clone* dst_sub = + upb_test_convert_MessageWithKnownMsgClone_known_msg(dst); + EXPECT_NE(dst_sub, nullptr); + EXPECT_EQ(123, upb_test_convert_MessageWithInt32Clone_f1(dst_sub)); + + // Deep copy expected because of mismatched minitables + EXPECT_NE((const void*)dst_sub, (const void*)sub); +} + +TEST(ConvertTest, ConvertExtensions_RemainsExtension) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_MessageValue ext_val; + ext_val.int32_val = 123; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_int32_ext, &ext_val, + arena.ptr()); + + const upb_MiniTable* mt = &upb__test__convert__MessageWithExtension_msg_init; + + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), mt, mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_MessageWithExtension* dst = + (const upb_test_convert_MessageWithExtension*)dst_msg; + + // Extension should still be perfectly preserved on the destination message. + EXPECT_TRUE(upb_Message_HasExtension(UPB_UPCAST(dst), + upb_test_convert_ext_field_int32_ext)); + + int32_t out_val = upb_Message_GetExtensionInt32( + UPB_UPCAST(dst), upb_test_convert_ext_field_int32_ext, 0); + EXPECT_EQ(123, out_val); +} + +TEST(ConvertTest, ConvertExtensions_LookupExtensionInRegistry) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_MessageValue ext_val; + ext_val.int32_val = 123; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_int32_ext, &ext_val, + arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = + &upb__test__convert__AnotherMessageWithExtension_msg_init; + + upb_ExtensionRegistry* extreg = upb_ExtensionRegistry_New(arena.ptr()); + upb_ExtensionRegistry_Add(extreg, + upb_test_convert_another_ext_field_int32_ext); + + const upb_Message* dst_msg = + upb_Message_Convert(UPB_UPCAST(msg), src_mt, dst_mt, extreg, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + const upb_test_convert_AnotherMessageWithExtension* dst = + (const upb_test_convert_AnotherMessageWithExtension*)dst_msg; + + // Extension should be found via registry and converted. + EXPECT_TRUE(upb_Message_HasExtension( + UPB_UPCAST(dst), upb_test_convert_another_ext_field_int32_ext)); + + int32_t out_val = upb_Message_GetExtensionInt32( + UPB_UPCAST(dst), upb_test_convert_another_ext_field_int32_ext, 0); + EXPECT_EQ(123, out_val); +} + +TEST(ConvertTest, ConvertExtensionToNonExtendable) { + upb::Arena arena; + upb_test_convert_MessageWithExtension* msg = + upb_test_convert_MessageWithExtension_new(arena.ptr()); + + upb_MessageValue ext_val; + ext_val.int32_val = 123; + upb_Message_SetExtension(UPB_UPCAST(msg), + upb_test_convert_ext_field_int32_ext, &ext_val, + arena.ptr()); + + const upb_MiniTable* src_mt = + &upb__test__convert__MessageWithExtension_msg_init; + const upb_MiniTable* dst_mt = &upb_0test__EmptyMessage_msg_init; + + const upb_Message* dst_msg = upb_Message_Convert( + UPB_UPCAST(msg), src_mt, dst_mt, nullptr, arena.ptr()); + EXPECT_NE(dst_msg, nullptr); + + // Dst should have unknown field 1000 with value 123. + size_t iter = kUpb_Message_UnknownBegin; + upb_StringView data; + ASSERT_TRUE(upb_Message_NextUnknown(dst_msg, &data, &iter)); + EXPECT_EQ(data.size, 3); + EXPECT_EQ((uint8_t)data.data[0], 0xC0); + EXPECT_EQ((uint8_t)data.data[1], 0x3E); + EXPECT_EQ((uint8_t)data.data[2], 0x7B); + EXPECT_FALSE(upb_Message_NextUnknown(dst_msg, &data, &iter)); +} + +TEST(ConvertTest, OneofDemotion) { + upb::Arena arena; + upb_test_convert_SrcWithOneof* msg = + upb_test_convert_SrcWithOneof_new(arena.ptr()); + // Set both fields of the oneof. The last one should win and the first should + // be dropped. + upb_test_convert_SrcWithOneof_set_oneof_int32(msg, 54321); + upb_test_convert_SrcWithOneof_set_oneof_string( + msg, upb_StringView_FromString("test")); + + // Convert to Empty message. Field 2 should become unknown. + const upb_MiniTable* empty_mt = &upb_0test__EmptyMessage_msg_init; + const upb_MiniTable* src_mt = &upb__test__convert__SrcWithOneof_msg_init; + + const upb_Message* dst = upb_Message_Convert(UPB_UPCAST(msg), src_mt, + empty_mt, nullptr, arena.ptr()); + EXPECT_NE(dst, nullptr); + + // Dst should have unknown field 2 with value "test". + size_t iter = kUpb_Message_UnknownBegin; + upb_StringView data; + EXPECT_TRUE(upb_Message_NextUnknown(dst, &data, &iter)); + // "test" string field 2. + // Tag 2, type 2: 2<<3 | 2 = 18 + EXPECT_EQ(data.size, 6); + EXPECT_EQ((uint8_t)data.data[0], 18); + EXPECT_EQ((uint8_t)data.data[1], 4); + EXPECT_EQ((uint8_t)data.data[2], 't'); + EXPECT_EQ((uint8_t)data.data[3], 'e'); + EXPECT_EQ((uint8_t)data.data[4], 's'); + EXPECT_EQ((uint8_t)data.data[5], 't'); + EXPECT_FALSE(upb_Message_NextUnknown(dst, &data, &iter)); + + // Now convert back and verify. + const upb_Message* dst2_msg = + upb_Message_Convert(dst, empty_mt, src_mt, nullptr, arena.ptr()); + EXPECT_NE(dst2_msg, nullptr); + const upb_test_convert_SrcWithOneof* dst2 = + (const upb_test_convert_SrcWithOneof*)dst2_msg; + EXPECT_EQ(upb_test_convert_SrcWithOneof_my_oneof_case(dst2), + upb_test_convert_SrcWithOneof_my_oneof_oneof_string); + upb_StringView str = upb_test_convert_SrcWithOneof_oneof_string(dst2); + EXPECT_EQ(std::string("test"), std::string(str.data, str.size)); +} diff --git a/upb/message/convert_test.proto b/upb/message/convert_test.proto new file mode 100644 index 0000000000000..d808b32c7c94c --- /dev/null +++ b/upb/message/convert_test.proto @@ -0,0 +1,118 @@ +syntax = "proto2"; + +package upb.test.convert; + +option java_multiple_files = true; +option java_outer_classname = "ConvertTest"; + +message MessageWithExtension { + extensions 1000 to max; +} + +extend MessageWithExtension { + optional int32 ext_field_int32 = 1000; + repeated MessageWithInt32 ext_field_repeated_msg = 1001; +} + +message MessageWithMapMessage { + map map_msg = 1; +} + +message MessageWithMapMessageClone { + map map_msg = 1; +} + +message MessageWithKnown { + optional int32 known_field_int32 = 1000; +} + +message MessageWithKnownInt64 { + optional int64 known_field_int64 = 1000; +} + +message MessageWithKnownRepeatedMsg { + repeated MessageWithInt32 known_repeated_msg = 1001; +} + +message MessageWithInt32 { + optional int32 f1 = 1; +} + +message MessageWithInt32Clone { + optional int32 f1 = 1; +} + +extend MessageWithExtension { + optional MessageWithInt32 ext_field_msg = 1002; +} + +message MessageWithKnownMsg { + optional MessageWithInt32 known_msg = 1002; +} + +message MessageWithKnownMsgClone { + optional MessageWithInt32Clone known_msg = 1002; +} + +message MessageWithRepeatedMsg { + repeated MessageWithInt32 msgs = 1; +} + +message MessageWithRepeatedMsgClone { + repeated MessageWithInt32Clone msgs = 1; +} + +message MessageWithMsg { + optional MessageWithInt32 msg = 1; +} + +message MessageWithMsgClone { + optional MessageWithInt32Clone msg = 1; +} + +message MessageWithKnownRepeatedMsgClone { + repeated MessageWithInt32Clone known_repeated_msg = 1001; +} + +message MessageWithInt64 { + optional int64 f1 = 1; +} + +message MessageWithString { + optional string f1 = 1; +} + +message MessageWithRepeatedInt32 { + repeated int32 r = 1; +} + +message MessageWithRepeatedInt64 { + repeated int64 r = 1; +} + +message MessageWithRepeatedString { + repeated string r = 1; +} + +message MessageWithMapInt32Int32 { + map m = 1; +} + +message MessageWithMapInt32Int64 { + map m = 1; +} + +message AnotherMessageWithExtension { + extensions 1000 to max; +} + +extend AnotherMessageWithExtension { + optional int32 another_ext_field_int32 = 1000; +} + +message SrcWithOneof { + oneof my_oneof { + int32 oneof_int32 = 1; + string oneof_string = 2; + } +} diff --git a/upb/message/test.cc b/upb/message/test.cc index 6a7214938b6b0..e28d2dd83830c 100644 --- a/upb/message/test.cc +++ b/upb/message/test.cc @@ -34,6 +34,7 @@ #include "upb/message/accessors.h" #include "upb/message/array.h" #include "upb/message/compare.h" +#include "upb/message/convert.h" #include "upb/message/map.h" #include "upb/message/message.h" #include "upb/message/test.upb.h" @@ -70,7 +71,7 @@ void VerifyMessage(const upb_test_TestExtensions* ext_msg) { EXPECT_EQ(123, upb_test_TestExtensions_optional_int32_ext(ext_msg)); const protobuf_test_messages_proto3_TestAllTypesProto3* ext_submsg = upb_test_optional_msg_ext(ext_msg); - EXPECT_TRUE(ext_submsg != nullptr); + EXPECT_NE(ext_submsg, nullptr); EXPECT_EQ(456, protobuf_test_messages_proto3_TestAllTypesProto3_optional_int32( ext_submsg)); @@ -86,7 +87,7 @@ TEST(MessageTest, Extensions) { upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestExtensions_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); std::string json = R"json( { @@ -107,7 +108,7 @@ TEST(MessageTest, Extensions) { size_t size; char* serialized = upb_test_TestExtensions_serialize(ext_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size, 0); upb_test_TestExtensions* ext_msg2 = upb_test_TestExtensions_parse_ex( @@ -115,6 +116,31 @@ TEST(MessageTest, Extensions) { arena.ptr()); VerifyMessage(ext_msg2); + // Convert with real populated expansion ranges dynamic DefPool entries logic + upb_MiniTable* mt_ext_conv = + const_cast(upb_MessageDef_MiniTable(m.ptr())); + + // Parse WITHOUT registry so extensions are unknown fields. + upb_test_TestExtensions* ext_msg_unkn = + upb_test_TestExtensions_parse(serialized, size, arena.ptr()); + + // Convert WITH registry to promote unknown fields to extensions. + const upb_Message* converted_msg = + upb_Message_Convert(UPB_UPCAST(ext_msg_unkn), mt_ext_conv, mt_ext_conv, + const_cast( + upb_DefPool_ExtensionRegistry(defpool.ptr())), + arena.ptr()); + ASSERT_NE(converted_msg, nullptr); + + // Validate logic bit layout conversions format promotion + const upb_test_TestExtensions* converted_ext_msg = + (const upb_test_TestExtensions*)converted_msg; + EXPECT_TRUE( + upb_test_TestExtensions_has_optional_int32_ext(converted_ext_msg)); + int32_t converted_val_promoted = + upb_test_TestExtensions_optional_int32_ext(converted_ext_msg); + EXPECT_EQ(123, converted_val_promoted); + // Test round-trip through JSON format. size_t json_size = upb_JsonEncode(UPB_UPCAST(ext_msg), m.ptr(), defpool.ptr(), 0, nullptr, 0, status.ptr()); @@ -122,11 +148,13 @@ TEST(MessageTest, Extensions) { static_cast(upb_Arena_Malloc(arena.ptr(), json_size + 1)); upb_JsonEncode(UPB_UPCAST(ext_msg), m.ptr(), defpool.ptr(), 0, json_buf, json_size + 1, status.ptr()); - upb_test_TestExtensions* ext_msg3 = upb_test_TestExtensions_new(arena.ptr()); - EXPECT_TRUE(upb_JsonDecode(json_buf, json_size, UPB_UPCAST(ext_msg3), m.ptr(), - defpool.ptr(), 0, arena.ptr(), status.ptr())) + upb_test_TestExtensions* ext_msg_json = + upb_test_TestExtensions_new(arena.ptr()); + EXPECT_TRUE(upb_JsonDecode(json_buf, json_size, UPB_UPCAST(ext_msg_json), + m.ptr(), defpool.ptr(), 0, arena.ptr(), + status.ptr())) << status.error_message(); - VerifyMessage(ext_msg3); + VerifyMessage(ext_msg_json); // Test setters and mutable accessors upb_test_TestExtensions* ext_msg4 = upb_test_TestExtensions_new(arena.ptr()); @@ -146,7 +174,7 @@ TEST(MessageTest, ExtensionsDeterministic) { upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestExtensions_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); std::string json = R"json( { @@ -166,14 +194,14 @@ TEST(MessageTest, ExtensionsDeterministic) { size_t size; char* serialized = upb_test_TestExtensions_serialize(ext_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size, 0); size_t deterministic_size; char* deterministic_serialized = upb_test_TestExtensions_serialize_ex( ext_msg, kUpb_EncodeOption_Deterministic, arena.ptr(), &deterministic_size); - ASSERT_TRUE(deterministic_serialized != nullptr); + ASSERT_NE(deterministic_serialized, nullptr); ASSERT_EQ(deterministic_size, size); } @@ -182,7 +210,7 @@ TEST(MessageTest, ExtensionsEmpty) { upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestExtensions_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); for (int options : {0, int{kUpb_EncodeOption_Deterministic}}) { std::string json_with_empty = R"json( @@ -215,13 +243,13 @@ TEST(MessageTest, ExtensionsEmpty) { size_t size_with_empty; char* serialized = upb_test_TestExtensions_serialize_ex( ext_msg_with_empty, options, arena.ptr(), &size_with_empty); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size_with_empty, 0); size_t size; serialized = upb_test_TestExtensions_serialize_ex(ext_msg, options, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); // Presence or absence of an empty extension should not affect the // serialized output. ASSERT_EQ(size_with_empty, size); @@ -229,13 +257,13 @@ TEST(MessageTest, ExtensionsEmpty) { } void VerifyMessageSet(const upb_test_TestMessageSet* mset_msg) { - ASSERT_TRUE(mset_msg != nullptr); + ASSERT_NE(mset_msg, nullptr); bool has = upb_test_MessageSetMember_has_message_set_extension(mset_msg); EXPECT_TRUE(has); if (!has) return; const upb_test_MessageSetMember* member = upb_test_MessageSetMember_message_set_extension(mset_msg); - EXPECT_TRUE(member != nullptr); + EXPECT_NE(member, nullptr); EXPECT_TRUE(upb_test_MessageSetMember_has_optional_int32(member)); EXPECT_EQ(234, upb_test_MessageSetMember_optional_int32(member)); } @@ -243,7 +271,7 @@ void VerifyMessageSet(const upb_test_TestMessageSet* mset_msg) { TEST(MessageTest, LargeMessageSetExtension) { upb::Arena arena; upb_ExtensionRegistry* reg = upb_ExtensionRegistry_New(arena.ptr()); - ASSERT_TRUE(reg != nullptr); + ASSERT_NE(reg, nullptr); upb_MiniTableExtension ext; memset(&ext, 0, sizeof(ext)); @@ -262,7 +290,7 @@ TEST(MessageTest, MessageSet) { upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestMessageSet_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); std::string json = R"json( { @@ -281,7 +309,7 @@ TEST(MessageTest, MessageSet) { size_t size; char* serialized = upb_test_TestMessageSet_serialize(ext_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size, 0); upb_test_TestMessageSet* ext_msg2 = upb_test_TestMessageSet_parse_ex( @@ -327,27 +355,27 @@ TEST(MessageTest, UnknownMessageSet) { size_t size; char* serialized = upb_test_FakeMessageSet_serialize(fake, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size, 0); upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestMessageSet_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); upb_test_TestMessageSet* message_set = upb_test_TestMessageSet_parse_ex( serialized, size, upb_DefPool_ExtensionRegistry(defpool.ptr()), 0, arena.ptr()); - ASSERT_TRUE(message_set != nullptr); + ASSERT_NE(message_set, nullptr); char* serialized2 = upb_test_TestMessageSet_serialize(message_set, arena.ptr(), &size); - ASSERT_TRUE(serialized2 != nullptr); + ASSERT_NE(serialized2, nullptr); ASSERT_GE(size, 0); // Parse back into a fake MessageSet and verify that the unknown MessageSet // item was preserved in full (both type_id and message). upb_test_FakeMessageSet* fake2 = upb_test_FakeMessageSet_parse(serialized2, size, arena.ptr()); - ASSERT_TRUE(fake2 != nullptr); + ASSERT_NE(fake2, nullptr); const upb_test_FakeMessageSet_Item* const* items = upb_test_FakeMessageSet_item(fake2, &size); @@ -381,17 +409,17 @@ TEST(MessageTest, MessageSetSubmessageEncoding) { size_t size; char* serialized = upb_test_TestExtensions_serialize(ext_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); ASSERT_GE(size, 0); upb::DefPool defpool; upb::MessageDefPtr m(upb_test_TestMessageSet_getmsgdef(defpool.ptr())); - EXPECT_TRUE(m.ptr() != nullptr); + EXPECT_NE(m.ptr(), nullptr); upb_test_TestMessageSet* message_set = upb_test_TestMessageSet_parse_ex( serialized, size, upb_DefPool_ExtensionRegistry(defpool.ptr()), 0, arena.ptr()); - ASSERT_TRUE(message_set != nullptr); + ASSERT_NE(message_set, nullptr); VerifyMessageSet(message_set); } @@ -428,7 +456,7 @@ TEST(MessageTest, Proto2Enum) { // Parsing as enums puts unknown values into unknown fields. upb_test_Proto2EnumMessage* enum_msg = upb_test_Proto2EnumMessage_parse(pb, size, arena.ptr()); - ASSERT_TRUE(enum_msg != nullptr); + ASSERT_NE(enum_msg, nullptr); EXPECT_EQ(false, upb_test_Proto2EnumMessage_has_optional_enum(enum_msg)); const int32_t* vals_const = @@ -440,7 +468,7 @@ TEST(MessageTest, Proto2Enum) { pb = upb_test_Proto2EnumMessage_serialize(enum_msg, arena.ptr(), &size); upb_test_Proto2FakeEnumMessage* fake_msg2 = upb_test_Proto2FakeEnumMessage_parse(pb, size, arena.ptr()); - ASSERT_TRUE(fake_msg2 != nullptr); + ASSERT_NE(fake_msg2, nullptr); EXPECT_EQ(true, upb_test_Proto2FakeEnumMessage_has_optional_enum(fake_msg2)); EXPECT_EQ(999, upb_test_Proto2FakeEnumMessage_optional_enum(fake_msg2)); @@ -522,7 +550,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { size_t size; char* serialized = upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); EXPECT_NE(0, size); // Fails, but the code path is slightly different because the serialized @@ -546,7 +574,7 @@ TEST(MessageTest, DecodeRequiredFieldsTopLevelMessage) { // Serialize a complete payload. serialized = upb_test_TestRequiredFields_serialize(test_msg, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); EXPECT_NE(0, size); upb_test_TestRequiredFields* test_msg2 = upb_test_TestRequiredFields_parse_ex( @@ -609,7 +637,7 @@ TEST(MessageTest, EncodeRequiredFields) { size_t size; char* serialized = upb_test_TestRequiredFields_serialize_ex(test_msg, 0, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); EXPECT_EQ(size, 0); // Fails, we asked for required field checking but the required field is @@ -630,7 +658,7 @@ TEST(MessageTest, EncodeRequiredFields) { upb_test_TestRequiredFields_set_required_message(test_msg, empty_msg); serialized = upb_test_TestRequiredFields_serialize_ex( test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); } TEST(MessageTest, MaxRequiredFields) { @@ -666,7 +694,7 @@ TEST(MessageTest, MaxRequiredFields) { upb_Message_SetFieldByDef(UPB_UPCAST(test_msg), f.ptr(), val, arena.ptr()); serialized = upb_test_TestMaxRequiredFields_serialize_ex( test_msg, kUpb_EncodeOption_CheckRequired, arena.ptr(), &size); - ASSERT_TRUE(serialized != nullptr); + ASSERT_NE(serialized, nullptr); } TEST(MessageTest, MapField) { diff --git a/upb/wire/BUILD b/upb/wire/BUILD index 92a46b3529e0b..a0b2a9490ed4a 100644 --- a/upb/wire/BUILD +++ b/upb/wire/BUILD @@ -27,6 +27,7 @@ cc_library( features = UPB_DEFAULT_FEATURES, visibility = [ "//upb:__pkg__", + "//upb/message:__subpackages__", "//upb/wire/decode_fast:__subpackages__", ], deps = [ diff --git a/upb/wire/internal/decoder.h b/upb/wire/internal/decoder.h index 3f684014bcd63..9efd42c3923bb 100644 --- a/upb/wire/internal/decoder.h +++ b/upb/wire/internal/decoder.h @@ -110,6 +110,16 @@ UPB_INLINE upb_DecodeStatus upb_Decoder_Destroy(upb_Decoder* d, return (upb_DecodeStatus)d->err->code; } +UPB_INLINE void upb_Decoder_Reset(upb_Decoder* d, int options, + upb_Message* msg) { + d->depth = upb_DecodeOptions_GetEffectiveMaxDepth(options); + d->options = options; + d->end_group = DECODE_NOGROUP; + d->missing_required = false; + d->message_is_done = false; + d->original_msg = msg; +} + #ifndef NDEBUG UPB_INLINE bool _upb_Decoder_TraceBufferHasBytesAvailable(upb_Decoder* d, int n) { diff --git a/upb/wire/internal/encoder.h b/upb/wire/internal/encoder.h index 13293404197e0..9678d49f2ef6b 100644 --- a/upb/wire/internal/encoder.h +++ b/upb/wire/internal/encoder.h @@ -50,6 +50,10 @@ UPB_INLINE void UPB_PRIVATE(_upb_encstate_init)(upb_encstate* e, jmp_buf* err, _upb_mapsorter_init(&e->sorter); } +UPB_INLINE void UPB_PRIVATE(_upb_encstate_destroy)(upb_encstate* e) { + _upb_mapsorter_destroy(&e->sorter); +} + // Internal version of upb_Encode that encodes a single field. // // The caller must clean up the `upb_encstate` by calling