diff --git a/ace.yaml b/ace.yaml index 3875e39..9f3635b 100644 --- a/ace.yaml +++ b/ace.yaml @@ -36,7 +36,7 @@ mtree: publication_name: "ace_mtree_pub" cdc_processing_timeout: 30 cdc_metadata_flush_seconds: 10 - schema: "spock" + schema: "pgedge_ace" diff: min_block_size: 1000 diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fb46ffe..4d53244 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,14 @@ All notable changes to ACE will be captured in this document. This project follo (unidirectional, bidirectional, fix-nulls, dry-run), Merkle tree operations, and origin-tracked replication with repair. +### ⚠️ Breaking Change +- **Default `mtree.schema` changed from `"spock"` to `"pgedge_ace"`.** If you have + been using Merkle tree operations (`mtree build`, `mtree diff`, continuous CDC) + in prior versions, your existing metadata tables live in the `spock` schema. To + preserve compatibility, set `schema: "spock"` in the following locations: + - `ace.yaml` → `mtree.schema` + - `visualise.sh` → `-S spock` flag (or edit the default in the script) + ### Changed - ACE schema name in SQL templates is now quoted with `pgx.Identifier.Sanitize()` to prevent SQL breakage with non-simple schema names (e.g., mixed case, diff --git a/docs/configuration.md b/docs/configuration.md index cbf023a..9f404af 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -37,7 +37,7 @@ The [`ace.yaml` file](https://github.com/pgEdge/ace/blob/main/ace.yaml) defines | mtree → cdc --> publication_name | Publication used for mtree CDC. **Default: "ace_mtree_pub"** | | mtree → cdc --> cdc_processing_timeout | CDC processing timeout (s). **Default: 30** | | mtree → cdc --> cdc_metadata_flush_seconds | How often (s) CDC metadata is flushed to disk. **Default: 10** | -| mtree --> schema | Schema used for mtree metadata/objects. **Default: "spock"** | +| mtree --> schema | Schema used for mtree metadata/objects. **Default: "pgedge_ace"** | | mtree → diff --> min_block_size | Minimum Merkle diff block size. **Default: 1000** | | mtree → diff --> block_size | Target Merkle diff block size. **Default: 100000** | | mtree → diff --> max_block_size | Maximum Merkle diff block size. **Default: 1000000** | diff --git a/internal/cli/default_config.yaml b/internal/cli/default_config.yaml index 279839f..e8e3d2a 100644 --- a/internal/cli/default_config.yaml +++ b/internal/cli/default_config.yaml @@ -35,7 +35,7 @@ mtree: publication_name: "ace_mtree_pub" cdc_processing_timeout: 30 cdc_metadata_flush_seconds: 10 - schema: "spock" + schema: "pgedge_ace" diff: min_block_size: 1000 diff --git a/tests/integration/test_env_test.go b/tests/integration/test_env_test.go index a0ca738..d7c9279 100644 --- a/tests/integration/test_env_test.go +++ b/tests/integration/test_env_test.go @@ -392,22 +392,32 @@ func (e *testEnv) pairKey() string { return e.ServiceN1 + "/" + e.ServiceN2 } -// awaitDataSync waits until n1 and n2 have the same row count for the given -// table. This prevents inter-test bleed when a previous subtest's cleanup -// repair is still replicating. +// awaitDataSync waits until n1 and n2 have the same row count AND the same +// content digest for the given table. Counts alone miss UPDATE-only drift +// (repair replays after UPDATE-based subtests change field values without +// changing row counts); the hashtext-sum digest is order-independent and +// PK-agnostic, so it catches that case. func (e *testEnv) awaitDataSync(t *testing.T, qualifiedTableName string) { t.Helper() ctx := context.Background() + query := fmt.Sprintf( + "SELECT count(*), COALESCE(sum(hashtext(t::text)::bigint), 0) FROM %s t", + qualifiedTableName, + ) assertEventually(t, 30*time.Second, func() error { var n1Count, n2Count int - if err := e.N1Pool.QueryRow(ctx, fmt.Sprintf("SELECT count(*) FROM %s", qualifiedTableName)).Scan(&n1Count); err != nil { - return fmt.Errorf("counting rows on n1: %w", err) + var n1Digest, n2Digest int64 + if err := e.N1Pool.QueryRow(ctx, query).Scan(&n1Count, &n1Digest); err != nil { + return fmt.Errorf("digesting rows on n1: %w", err) } - if err := e.N2Pool.QueryRow(ctx, fmt.Sprintf("SELECT count(*) FROM %s", qualifiedTableName)).Scan(&n2Count); err != nil { - return fmt.Errorf("counting rows on n2: %w", err) + if err := e.N2Pool.QueryRow(ctx, query).Scan(&n2Count, &n2Digest); err != nil { + return fmt.Errorf("digesting rows on n2: %w", err) } if n1Count != n2Count { - return fmt.Errorf("nodes not in sync for %s: n1=%d n2=%d", qualifiedTableName, n1Count, n2Count) + return fmt.Errorf("nodes not in sync for %s: n1 count=%d n2 count=%d", qualifiedTableName, n1Count, n2Count) + } + if n1Digest != n2Digest { + return fmt.Errorf("nodes not in sync for %s: count=%d but digest n1=%d n2=%d", qualifiedTableName, n1Count, n1Digest, n2Digest) } return nil }) diff --git a/visualise.sh b/visualise.sh index f485d16..cc4c759 100755 --- a/visualise.sh +++ b/visualise.sh @@ -12,6 +12,7 @@ #!/usr/bin/env bash # visualise.sh # Flags: +# -S (default: pgedge_ace) -> schema where ACE mtree objects live # -s (default: public) # -t (required) -> full table: ace_mtree__
# -H (optional) -> psql -h @@ -21,11 +22,11 @@ # -v vertical # # Uses PG* env for user and db Example: -# PGUSER=admin PGDATABASE=demo ./visualise.sh -s public -t customers_small -H localhost -v +# PGUSER=admin PGDATABASE=demo ./visualise.sh -s public -t customers_small -H localhost -v [-S pgedge_ace] set -euo pipefail -ace_schema="spock" +ace_schema="pgedge_ace" table_schema="public" table="" host="" @@ -33,8 +34,9 @@ user="${PGUSER:-admin}" dbname="${PGDATABASE:-demo}" orientation="vertical" # default -while getopts ":s:t:H:U:d:hv" opt; do +while getopts ":S:s:t:H:U:d:hv" opt; do case "$opt" in + S) ace_schema="$OPTARG" ;; s) table_schema="$OPTARG" ;; t) table="$OPTARG" ;; H) host="$OPTARG" ;; @@ -48,7 +50,7 @@ while getopts ":s:t:H:U:d:hv" opt; do done if [[ -z "$table" ]]; then - echo "Usage: $0 -t
[-s ] [-H ] [-U ] [-d ] [-h|-v]" >&2 + echo "Usage: $0 -t
[-s ] [-S ] [-H ] [-U ] [-d ] [-h|-v]" >&2 exit 2 fi