From 0b6008334ca3b718b785752f501c91b8b8929c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bastian=20M=C3=BCller?= Date: Wed, 8 Apr 2026 13:44:49 -0700 Subject: [PATCH] add support for debug comparing mismatching blocks when verification fails --- cmd/util/cmd/compare-debug-tx/cmd.go | 152 ++++++++++++------ cmd/util/cmd/verify_execution_result/cmd.go | 62 ++++++- engine/verification/verifier/verifiers.go | 27 ++-- .../verification/verifier/verifiers_test.go | 1 + 4 files changed, 176 insertions(+), 66 deletions(-) diff --git a/cmd/util/cmd/compare-debug-tx/cmd.go b/cmd/util/cmd/compare-debug-tx/cmd.go index e6174e80c11..96e7853af5b 100644 --- a/cmd/util/cmd/compare-debug-tx/cmd.go +++ b/cmd/util/cmd/compare-debug-tx/cmd.go @@ -33,6 +33,7 @@ var ( flagOnlyTraceCadence bool flagEntropyProvider string flagShowTraceDiff bool + flagBlockIDs string ) var Cmd = &cobra.Command{ @@ -73,6 +74,26 @@ func init() { Cmd.Flags().StringVar(&flagEntropyProvider, "entropy-provider", "none", "entropy provider to use (default: none; options: none, block-hash)") Cmd.Flags().BoolVar(&flagShowTraceDiff, "show-trace-diff", false, "show trace diff output (default: false)") + + Cmd.Flags().StringVar(&flagBlockIDs, "block-ids", "", "comma-separated hex block IDs (alternative to --block-id + --block-count)") +} + +// Config holds the parameters for a compare-debug-tx invocation. +type Config struct { + Branch1 string + Branch2 string + Chain string + AccessAddress string + ExecutionAddress string + ComputeLimit uint64 + UseExecutionDataAPI bool + BlockIDs []string // pre-resolved block IDs (hex strings) + TxIDs []string // positional transaction IDs + Parallel int + LogCadenceTraces bool + OnlyTraceCadence bool + EntropyProvider string + ShowTraceDiff bool } // closeFile closes the file and fatals on failure. @@ -90,21 +111,55 @@ func removeFile(path string) { } func run(_ *cobra.Command, args []string) { - repoRoot := findRepoRoot() - checkCleanWorkingTree(repoRoot) - - // Resolve block IDs before git checkouts when --block-count > 0. - var blockIDs []string - if flagBlockCount > 0 { + cfg := Config{ + Branch1: flagBranch1, + Branch2: flagBranch2, + Chain: flagChain, + AccessAddress: flagAccessAddress, + ExecutionAddress: flagExecutionAddress, + ComputeLimit: flagComputeLimit, + UseExecutionDataAPI: flagUseExecutionDataAPI, + TxIDs: args, + Parallel: flagParallel, + LogCadenceTraces: flagLogCadenceTraces, + OnlyTraceCadence: flagOnlyTraceCadence, + EntropyProvider: flagEntropyProvider, + ShowTraceDiff: flagShowTraceDiff, + } + + // Resolve block IDs before git checkouts. + if flagBlockIDs != "" { + if flagBlockID != "" || flagBlockCount > 1 { + log.Fatal().Msg("--block-ids cannot be used with --block-id or --block-count > 1") + } + if len(args) > 0 { + log.Fatal().Msg("--block-ids cannot be used with positional transaction IDs") + } + for _, raw := range strings.Split(flagBlockIDs, ",") { + raw = strings.TrimSpace(raw) + if _, err := flow.HexStringToIdentifier(raw); err != nil { + log.Fatal().Err(err).Str("id", raw).Msg("invalid block ID in --block-ids") + } + cfg.BlockIDs = append(cfg.BlockIDs, raw) + } + } else if flagBlockCount > 0 { if flagBlockID == "" { log.Fatal().Msg("--block-count requires --block-id to be set") } if len(args) > 0 { log.Fatal().Msg("--block-count cannot be used with positional transaction IDs") } - blockIDs = resolveBlockChain(flagBlockID, flagBlockCount) + cfg.BlockIDs = resolveBlockChain(flagAccessAddress, flagBlockID, flagBlockCount) } + Run(cfg) +} + +// Run executes the compare-debug-tx workflow with the given configuration. +func Run(cfg Config) { + repoRoot := findRepoRoot() + checkCleanWorkingTree(repoRoot) + result1, err := os.CreateTemp("", "compare-debug-tx-result1-*") if err != nil { log.Fatal().Err(err).Msg("failed to create temp file for result1") @@ -133,29 +188,29 @@ func run(_ *cobra.Command, args []string) { defer removeFile(trace2.Name()) defer closeFile(trace2) - checkoutBranch(repoRoot, flagBranch1) + checkoutBranch(repoRoot, cfg.Branch1) binary1 := buildUtil(repoRoot) defer removeFile(binary1) - perBlock1 := runAllBlocks(binary1, args, blockIDs, result1, trace1) + perBlock1 := runAllBlocks(cfg, binary1, result1, trace1) defer cleanupPerBlockFiles(perBlock1) - checkoutBranch(repoRoot, flagBranch2) + checkoutBranch(repoRoot, cfg.Branch2) binary2 := buildUtil(repoRoot) defer removeFile(binary2) - perBlock2 := runAllBlocks(binary2, args, blockIDs, result2, trace2) + perBlock2 := runAllBlocks(cfg, binary2, result2, trace2) defer cleanupPerBlockFiles(perBlock2) - if len(blockIDs) == 1 { - fmt.Printf("=== Result diff (%s vs %s) ===\n", flagBranch1, flagBranch2) - diffFiles(result1.Name(), result2.Name(), flagBranch1, flagBranch2) + if len(cfg.BlockIDs) == 1 { + fmt.Printf("=== Result diff (%s vs %s) ===\n", cfg.Branch1, cfg.Branch2) + diffFiles(result1.Name(), result2.Name(), cfg.Branch1, cfg.Branch2) - if flagShowTraceDiff { - fmt.Printf("=== Trace diff (%s vs %s) ===\n", flagBranch1, flagBranch2) - diffFiles(trace1.Name(), trace2.Name(), flagBranch1, flagBranch2) + if cfg.ShowTraceDiff { + fmt.Printf("=== Trace diff (%s vs %s) ===\n", cfg.Branch1, cfg.Branch2) + diffFiles(trace1.Name(), trace2.Name(), cfg.Branch1, cfg.Branch2) } } - printMismatchStats(blockIDs, perBlock1, perBlock2) + printMismatchStats(cfg.BlockIDs, perBlock1, perBlock2) } // perBlockFiles holds per-block result and trace temp files. @@ -174,23 +229,23 @@ func cleanupPerBlockFiles(files []perBlockFiles) { } } -// runAllBlocks runs debug-tx for each block ID in blockIDs (or a single invocation when -// blockIDs is empty), up to flagParallel blocks concurrently. Results and traces from each +// runAllBlocks runs debug-tx for each block ID in cfg.BlockIDs (or a single invocation when +// BlockIDs is empty), up to cfg.Parallel blocks concurrently. Results and traces from each // block are collected into per-block temp files and concatenated into resultDst and traceDst // in deterministic order after all blocks complete. // -// When blockIDs has entries, the per-block files are returned and the caller is responsible -// for cleanup via cleanupPerBlockFiles. When blockIDs is empty, nil is returned. -func runAllBlocks(binaryPath string, txIDs []string, blockIDs []string, resultDst *os.File, traceDst *os.File) []perBlockFiles { - if len(blockIDs) == 0 { - if err := runDebugTx(binaryPath, buildDebugTxArgs(txIDs, traceDst.Name(), ""), resultDst); err != nil { +// When BlockIDs has entries, the per-block files are returned and the caller is responsible +// for cleanup via cleanupPerBlockFiles. When BlockIDs is empty, nil is returned. +func runAllBlocks(cfg Config, binaryPath string, resultDst *os.File, traceDst *os.File) []perBlockFiles { + if len(cfg.BlockIDs) == 0 { + if err := runDebugTx(binaryPath, buildDebugTxArgs(cfg, traceDst.Name(), ""), resultDst); err != nil { log.Fatal().Err(err).Msg("failed to run debug-tx") } return nil } - files := make([]perBlockFiles, len(blockIDs)) - for i := range blockIDs { + files := make([]perBlockFiles, len(cfg.BlockIDs)) + for i := range cfg.BlockIDs { result, err := os.CreateTemp("", "compare-debug-tx-block-result-*") if err != nil { log.Fatal().Err(err).Msg("failed to create per-block result temp file") @@ -203,11 +258,11 @@ func runAllBlocks(binaryPath string, txIDs []string, blockIDs []string, resultDs } g, _ := errgroup.WithContext(context.Background()) - g.SetLimit(flagParallel) + g.SetLimit(cfg.Parallel) - for i, blockID := range blockIDs { + for i, blockID := range cfg.BlockIDs { g.Go(func() error { - return runDebugTx(binaryPath, buildDebugTxArgs(txIDs, files[i].trace.Name(), blockID), files[i].result) + return runDebugTx(binaryPath, buildDebugTxArgs(cfg, files[i].trace.Name(), blockID), files[i].result) }) } if err := g.Wait(); err != nil { @@ -234,13 +289,13 @@ func runAllBlocks(binaryPath string, txIDs []string, blockIDs []string, resultDs // resolveBlockChain fetches count consecutive block IDs starting from startBlockID, // following parent IDs, and returns them as hex strings. -func resolveBlockChain(startBlockID string, count int) []string { +func resolveBlockChain(accessAddress string, startBlockID string, count int) []string { blockID, err := flow.HexStringToIdentifier(startBlockID) if err != nil { log.Fatal().Err(err).Str("ID", startBlockID).Msg("failed to parse block ID") } - config, err := grpcclient.NewFlowClientConfig(flagAccessAddress, "", flow.ZeroID, true) + config, err := grpcclient.NewFlowClientConfig(accessAddress, "", flow.ZeroID, true) if err != nil { log.Fatal().Err(err).Msg("failed to create flow client config") } @@ -334,35 +389,30 @@ func runDebugTx(binaryPath string, fwdArgs []string, resultDst *os.File) error { return cmd.Run() } -// buildDebugTxArgs assembles the flag arguments for the debug-tx command, appending -// --show-result=true, --trace=, and the given tx IDs. -// blockIDOverride, when non-empty, overrides --block-id instead of using flagBlockID. -func buildDebugTxArgs(txIDs []string, tracePath string, blockIDOverride string) []string { +// buildDebugTxArgs assembles the flag arguments for the debug-tx command. +// blockIDOverride, when non-empty, is passed as --block-id. +func buildDebugTxArgs(cfg Config, tracePath string, blockIDOverride string) []string { args := []string{ - "--chain=" + flagChain, - "--access-address=" + flagAccessAddress, - fmt.Sprintf("--compute-limit=%d", flagComputeLimit), - fmt.Sprintf("--use-execution-data-api=%t", flagUseExecutionDataAPI), - fmt.Sprintf("--log-cadence-traces=%t", flagLogCadenceTraces), - fmt.Sprintf("--only-trace-cadence=%t", flagOnlyTraceCadence), - "--entropy-provider=" + flagEntropyProvider, + "--chain=" + cfg.Chain, + "--access-address=" + cfg.AccessAddress, + fmt.Sprintf("--compute-limit=%d", cfg.ComputeLimit), + fmt.Sprintf("--use-execution-data-api=%t", cfg.UseExecutionDataAPI), + fmt.Sprintf("--log-cadence-traces=%t", cfg.LogCadenceTraces), + fmt.Sprintf("--only-trace-cadence=%t", cfg.OnlyTraceCadence), + "--entropy-provider=" + cfg.EntropyProvider, "--show-result=true", "--trace=" + tracePath, } - if flagExecutionAddress != "" { - args = append(args, "--execution-address="+flagExecutionAddress) + if cfg.ExecutionAddress != "" { + args = append(args, "--execution-address="+cfg.ExecutionAddress) } - blockID := flagBlockID if blockIDOverride != "" { - blockID = blockIDOverride - } - if blockID != "" { - args = append(args, "--block-id="+blockID) + args = append(args, "--block-id="+blockIDOverride) } - args = append(args, txIDs...) + args = append(args, cfg.TxIDs...) return args } diff --git a/cmd/util/cmd/verify_execution_result/cmd.go b/cmd/util/cmd/verify_execution_result/cmd.go index 546be9c071a..79070203e8d 100644 --- a/cmd/util/cmd/verify_execution_result/cmd.go +++ b/cmd/util/cmd/verify_execution_result/cmd.go @@ -5,9 +5,11 @@ import ( "strconv" "strings" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" "github.com/spf13/cobra" + compare_debug_tx "github.com/onflow/flow-go/cmd/util/cmd/compare-debug-tx" "github.com/onflow/flow-go/cmd/util/cmd/common" "github.com/onflow/flow-go/engine/verification/verifier" "github.com/onflow/flow-go/fvm" @@ -25,6 +27,11 @@ var ( flagStopOnMismatch bool flagtransactionFeesDisabled bool flagScheduledTransactionsEnabled bool + + flagCompareBranch1 string + flagCompareBranch2 string + flagCompareAccessAddress string + flagCompareParallel int ) // # verify the last 100 sealed blocks @@ -61,6 +68,11 @@ func init() { Cmd.Flags().BoolVar(&flagtransactionFeesDisabled, "fees_disabled", false, "disable transaction fees") Cmd.Flags().BoolVar(&flagScheduledTransactionsEnabled, "scheduled_callbacks_enabled", fvm.DefaultScheduledTransactionsEnabled, "[deprecated] enable scheduled transactions") + + Cmd.Flags().StringVar(&flagCompareBranch1, "compare-branch1", "", "first git branch for cross-branch comparison on mismatch") + Cmd.Flags().StringVar(&flagCompareBranch2, "compare-branch2", "", "second git branch for cross-branch comparison on mismatch") + Cmd.Flags().StringVar(&flagCompareAccessAddress, "compare-access-address", "", "access node address for cross-branch comparison") + Cmd.Flags().IntVar(&flagCompareParallel, "compare-parallel", 1, "number of blocks to process in parallel during comparison") } func run(*cobra.Command, []string) { @@ -88,6 +100,13 @@ func run(*cobra.Command, []string) { lg.Info().Msgf("look for 'could not verify' in the log for any mismatch, or try again with --stop_on_mismatch true to stop on first mismatch") } + compareEnabled := flagCompareBranch1 != "" + if compareEnabled { + if flagCompareBranch2 == "" || flagCompareAccessAddress == "" { + lg.Fatal().Msg("--compare-branch2 and --compare-access-address are required when --compare-branch1 is set") + } + } + var totalStats verifier.BlockVerificationStats if flagFromTo != "" { @@ -110,9 +129,14 @@ func run(*cobra.Command, []string) { flagScheduledTransactionsEnabled, ) if err != nil { - lg.Fatal().Err(err).Msgf("could not verify range from %d to %d", from, to) + if compareEnabled && len(totalStats.MismatchedBlockIDs) > 0 { + lg.Error().Err(err).Msgf("verification stopped due to mismatch, proceeding to comparison") + } else { + lg.Fatal().Err(err).Msgf("could not verify range from %d to %d", from, to) + } + } else { + lg.Info().Msgf("finished verifying range from %d to %d", from, to) } - lg.Info().Msgf("finished verifying range from %d to %d", from, to) } else { lg.Info().Msgf("verifying last %d sealed blocks", flagLastK) var err error @@ -128,10 +152,14 @@ func run(*cobra.Command, []string) { flagScheduledTransactionsEnabled, ) if err != nil { - lg.Fatal().Err(err).Msg("could not verify last k height") + if compareEnabled && len(totalStats.MismatchedBlockIDs) > 0 { + lg.Error().Err(err).Msgf("verification stopped due to mismatch, proceeding to comparison") + } else { + lg.Fatal().Err(err).Msg("could not verify last k height") + } + } else { + lg.Info().Msgf("finished verifying last %d sealed blocks", flagLastK) } - - lg.Info().Msgf("finished verifying last %d sealed blocks", flagLastK) } lg.Info().Msgf("matching chunks: %d/%d. matching transactions: %d/%d", @@ -140,6 +168,30 @@ func run(*cobra.Command, []string) { totalStats.MatchedTransactionCount, totalStats.MatchedTransactionCount+totalStats.MismatchedTransactionCount, ) + + if compareEnabled && len(totalStats.MismatchedBlockIDs) > 0 { + runCompareDebugTx(lg, totalStats.MismatchedBlockIDs) + } +} + +func runCompareDebugTx(lg zerolog.Logger, mismatchedBlockIDs []flow.Identifier) { + blockIDs := make([]string, len(mismatchedBlockIDs)) + for i, id := range mismatchedBlockIDs { + blockIDs[i] = id.String() + } + + lg.Info(). + Int("mismatch_count", len(mismatchedBlockIDs)). + Msg("invoking compare-debug-tx for mismatched blocks") + + compare_debug_tx.Run(compare_debug_tx.Config{ + Branch1: flagCompareBranch1, + Branch2: flagCompareBranch2, + Chain: flagChain, + AccessAddress: flagCompareAccessAddress, + BlockIDs: blockIDs, + Parallel: flagCompareParallel, + }) } func parseFromTo(fromTo string) (from, to uint64, err error) { diff --git a/engine/verification/verifier/verifiers.go b/engine/verification/verifier/verifiers.go index 9463a796f3f..fe213f1f19a 100644 --- a/engine/verification/verifier/verifiers.go +++ b/engine/verification/verifier/verifiers.go @@ -229,6 +229,7 @@ func verifyConcurrently( totalStats.MismatchedChunkCount += blockStats.MismatchedChunkCount totalStats.MatchedTransactionCount += blockStats.MatchedTransactionCount totalStats.MismatchedTransactionCount += blockStats.MismatchedTransactionCount + totalStats.MismatchedBlockIDs = append(totalStats.MismatchedBlockIDs, blockStats.MismatchedBlockIDs...) if err != nil { log.Error().Uint64("height", height).Err(err).Msg("error encountered while verifying height") @@ -344,6 +345,7 @@ type BlockVerificationStats struct { MismatchedChunkCount uint64 MatchedTransactionCount uint64 MismatchedTransactionCount uint64 + MismatchedBlockIDs []flow.Identifier } // verifyHeight verifies all chunks in the results of the block at the given height. @@ -400,16 +402,17 @@ func verifyHeight( if stopOnMismatch { return BlockVerificationStats{ - MismatchedChunkCount: 1, - MismatchedTransactionCount: chunkTransactionCount, - }, fmt.Errorf( - "could not verify chunk (index: %v, ID: %v) at block %v (%v): %w", - i, - collectionID, - height, - blockID, - err, - ) + MismatchedChunkCount: 1, + MismatchedTransactionCount: chunkTransactionCount, + MismatchedBlockIDs: []flow.Identifier{blockID}, + }, fmt.Errorf( + "could not verify chunk (index: %v, ID: %v) at block %v (%v): %w", + i, + collectionID, + height, + blockID, + err, + ) } if vcd.IsSystemChunk { @@ -435,6 +438,10 @@ func verifyHeight( } } + if stats.MismatchedChunkCount > 0 { + stats.MismatchedBlockIDs = []flow.Identifier{blockID} + } + return stats, nil } diff --git a/engine/verification/verifier/verifiers_test.go b/engine/verification/verifier/verifiers_test.go index addc07b844f..688fca84885 100644 --- a/engine/verification/verifier/verifiers_test.go +++ b/engine/verification/verifier/verifiers_test.go @@ -255,6 +255,7 @@ func TestVerifyHeight_MismatchWithStop(t *testing.T) { assert.Equal(t, uint64(1), stats.MismatchedChunkCount) assert.Equal(t, uint64(0), stats.MatchedTransactionCount) assert.Equal(t, txPerChunk, stats.MismatchedTransactionCount) + assert.Len(t, stats.MismatchedBlockIDs, 1) } func TestVerifyHeight_BlockNotExecuted(t *testing.T) {