From 1a29dbbe9afac7bab2296ffc932704b6413cb6c9 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Mon, 9 Mar 2026 15:40:03 -0700 Subject: [PATCH 01/12] Improve tests for various error scenarios - Regex meta characters in index names should not break warning detection (required code fix) - Improve tests that only checked number of rows (need to validate data as well) - Test positive case allowing ignored duplicates on migration key - Test behavior with PanicOnWarnings disabled --- go/logic/applier.go | 32 +++-- go/logic/applier_test.go | 304 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 322 insertions(+), 14 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 58761d844..a344919c9 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -917,6 +917,17 @@ func (this *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected return nil, err } + // Compile regex once before loop to avoid performance penalty and handle errors properly + // Escape metacharacters to avoid regex syntax errors with table/index names like "my_table[test]" or "idx.name" + // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix + escapedTable := regexp.QuoteMeta(this.migrationContext.GetGhostTableName()) + escapedKey := regexp.QuoteMeta(this.migrationContext.UniqueKey.NameInGhostTable) + migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) + migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) + if err != nil { + return nil, fmt.Errorf("failed to compile migration key pattern: %w", err) + } + var sqlWarnings []string for rows.Next() { var level, message string @@ -925,10 +936,7 @@ func (this *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected this.migrationContext.Log.Warningf("Failed to read SHOW WARNINGS row") continue } - // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix - migrationUniqueKeyExpression := fmt.Sprintf("for key '(%s\\.)?%s'", this.migrationContext.GetGhostTableName(), this.migrationContext.UniqueKey.NameInGhostTable) - matched, _ := regexp.MatchString(migrationUniqueKeyExpression, message) - if strings.Contains(message, "Duplicate entry") && matched { + if strings.Contains(message, "Duplicate entry") && migrationKeyRegex.MatchString(message) { continue } sqlWarnings = append(sqlWarnings, fmt.Sprintf("%s: %s (%d)", level, message, code)) @@ -1559,6 +1567,17 @@ func (this *Applier) ApplyDMLEventQueries(dmlEvents [](*binlog.BinlogDMLEvent)) return rollback(err) } + // Compile regex once before loop to avoid performance penalty and handle errors properly + // Escape metacharacters to avoid regex syntax errors with table/index names like "my_table[test]" or "idx.name" + // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix + escapedTable := regexp.QuoteMeta(this.migrationContext.GetGhostTableName()) + escapedKey := regexp.QuoteMeta(this.migrationContext.UniqueKey.NameInGhostTable) + migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) + migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) + if err != nil { + return rollback(fmt.Errorf("failed to compile migration key pattern: %w", err)) + } + var sqlWarnings []string for rows.Next() { var level, message string @@ -1567,10 +1586,7 @@ func (this *Applier) ApplyDMLEventQueries(dmlEvents [](*binlog.BinlogDMLEvent)) this.migrationContext.Log.Warningf("Failed to read SHOW WARNINGS row") continue } - // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix - migrationUniqueKeyExpression := fmt.Sprintf("for key '(%s\\.)?%s'", this.migrationContext.GetGhostTableName(), this.migrationContext.UniqueKey.NameInGhostTable) - matched, _ := regexp.MatchString(migrationUniqueKeyExpression, message) - if strings.Contains(message, "Duplicate entry") && matched { + if strings.Contains(message, "Duplicate entry") && migrationKeyRegex.MatchString(message) { // Duplicate entry on migration unique key is expected during binlog replay // (row was already copied during bulk copy phase) continue diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index 9c761373a..e87b04b47 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -782,23 +782,35 @@ func (suite *ApplierTestSuite) TestPanicOnWarningsWithDuplicateKeyOnNonMigration suite.Require().Error(err) suite.Require().Contains(err.Error(), "Duplicate entry") - // Verify that the ghost table still has only 3 rows (no data loss) - rows, err := suite.db.Query("SELECT * FROM " + getTestGhostTableName() + " ORDER BY id") + // Verify that the ghost table still has only the original 3 rows with correct data (no data loss) + rows, err := suite.db.Query("SELECT id, email FROM " + getTestGhostTableName() + " ORDER BY id") suite.Require().NoError(err) defer rows.Close() - var count int + var results []struct { + id int + email string + } for rows.Next() { var id int var email string err = rows.Scan(&id, &email) suite.Require().NoError(err) - count += 1 + results = append(results, struct { + id int + email string + }{id, email}) } suite.Require().NoError(rows.Err()) - // All 3 original rows should still be present - suite.Require().Equal(3, count) + // All 3 original rows should still be present with correct data + suite.Require().Len(results, 3) + suite.Require().Equal(1, results[0].id) + suite.Require().Equal("user1@example.com", results[0].email) + suite.Require().Equal(2, results[1].id) + suite.Require().Equal("user2@example.com", results[1].email) + suite.Require().Equal(3, results[2].id) + suite.Require().Equal("user3@example.com", results[2].email) } // TestUpdateModifyingUniqueKeyWithDuplicateOnOtherIndex tests the scenario where: @@ -980,6 +992,286 @@ func (suite *ApplierTestSuite) TestNormalUpdateWithPanicOnWarnings() { suite.Require().NoError(rows.Err()) } +// TestDuplicateOnMigrationKeyAllowedInBinlogReplay tests the positive case where +// a duplicate on the migration unique key during binlog replay is expected and should be allowed +func (suite *ApplierTestSuite) TestDuplicateOnMigrationKeyAllowedInBinlogReplay() { + ctx := context.Background() + + var err error + + // Create table with id and email columns, where id is the primary key + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100));", getTestTableName())) + suite.Require().NoError(err) + + // Create ghost table with same schema plus a new unique index on email + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), UNIQUE KEY email_unique (email));", getTestGhostTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContext := newTestMigrationContext() + migrationContext.ApplierConnectionConfig = connectionConfig + migrationContext.SetConnectionConfig("innodb") + + migrationContext.PanicOnWarnings = true + + migrationContext.OriginalTableColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.SharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.MappedSharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.UniqueKey = &sql.UniqueKey{ + Name: "PRIMARY", + NameInGhostTable: "PRIMARY", + Columns: *sql.NewColumnList([]string{"id"}), + } + + applier := NewApplier(migrationContext) + suite.Require().NoError(applier.prepareQueries()) + defer applier.Teardown() + + err = applier.InitDBConnections() + suite.Require().NoError(err) + + // Insert initial rows into ghost table (simulating bulk copy phase) + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (id, email) VALUES (1, 'alice@example.com'), (2, 'bob@example.com');", getTestGhostTableName())) + suite.Require().NoError(err) + + // Simulate binlog event: try to insert the same row again (duplicate on PRIMARY KEY - the migration key) + // This is expected during binlog replay when a row was already copied during bulk copy + dmlEvents := []*binlog.BinlogDMLEvent{ + { + DatabaseName: testMysqlDatabase, + TableName: testMysqlTableName, + DML: binlog.InsertDML, + NewColumnValues: sql.ToColumnValues([]interface{}{1, "alice@example.com"}), // duplicate PRIMARY KEY + }, + } + + // This should succeed - duplicate on migration unique key is expected and should be filtered out + err = applier.ApplyDMLEventQueries(dmlEvents) + suite.Require().NoError(err) + + // Verify that the ghost table still has only the original 2 rows with correct data + rows, err := suite.db.Query("SELECT id, email FROM " + getTestGhostTableName() + " ORDER BY id") + suite.Require().NoError(err) + defer rows.Close() + + var results []struct { + id int + email string + } + for rows.Next() { + var id int + var email string + err = rows.Scan(&id, &email) + suite.Require().NoError(err) + results = append(results, struct { + id int + email string + }{id, email}) + } + suite.Require().NoError(rows.Err()) + + // Should still have exactly 2 rows with correct data + suite.Require().Len(results, 2) + suite.Require().Equal(1, results[0].id) + suite.Require().Equal("alice@example.com", results[0].email) + suite.Require().Equal(2, results[1].id) + suite.Require().Equal("bob@example.com", results[1].email) +} + +// TestRegexMetacharactersInIndexName tests that index names with regex metacharacters +// are properly escaped. We test with a plus sign in the index name, which without +// QuoteMeta would be treated as a regex quantifier (one or more of preceding character). +// This test verifies the pattern matches ONLY the exact index name, not a regex pattern. +func (suite *ApplierTestSuite) TestRegexMetacharactersInIndexName() { + ctx := context.Background() + + var err error + + // Create tables with an index name containing a plus sign + // Without QuoteMeta, "idx+" would mean "one or more 'd'" in regex + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100));", getTestTableName())) + suite.Require().NoError(err) + + // MySQL allows + in index names when quoted + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), UNIQUE KEY `idx+email` (email));", getTestGhostTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContext := newTestMigrationContext() + migrationContext.ApplierConnectionConfig = connectionConfig + migrationContext.SetConnectionConfig("innodb") + + migrationContext.PanicOnWarnings = true + + migrationContext.OriginalTableColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.SharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.MappedSharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.UniqueKey = &sql.UniqueKey{ + Name: "PRIMARY", + NameInGhostTable: "PRIMARY", + Columns: *sql.NewColumnList([]string{"id"}), + } + + applier := NewApplier(migrationContext) + suite.Require().NoError(applier.prepareQueries()) + defer applier.Teardown() + + err = applier.InitDBConnections() + suite.Require().NoError(err) + + // Insert initial rows + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (id, email) VALUES (1, 'alice@example.com'), (2, 'bob@example.com');", getTestGhostTableName())) + suite.Require().NoError(err) + + // Test: duplicate on PRIMARY KEY (migration key) should be allowed + dmlEvents := []*binlog.BinlogDMLEvent{ + { + DatabaseName: testMysqlDatabase, + TableName: testMysqlTableName, + DML: binlog.InsertDML, + NewColumnValues: sql.ToColumnValues([]interface{}{1, "alice@example.com"}), + }, + } + + err = applier.ApplyDMLEventQueries(dmlEvents) + suite.Require().NoError(err, "Duplicate on PRIMARY should be allowed even with + in index name") + + // Test: duplicate on idx+email should fail + // This verifies our regex correctly identifies "idx+email" (not as a regex pattern) + dmlEvents = []*binlog.BinlogDMLEvent{ + { + DatabaseName: testMysqlDatabase, + TableName: testMysqlTableName, + DML: binlog.InsertDML, + NewColumnValues: sql.ToColumnValues([]interface{}{3, "alice@example.com"}), + }, + } + + err = applier.ApplyDMLEventQueries(dmlEvents) + suite.Require().Error(err, "Duplicate on idx+email should fail") + suite.Require().Contains(err.Error(), "Duplicate entry") + + // Verify final state - should still have only the original 2 rows + rows, err := suite.db.Query("SELECT id, email FROM " + getTestGhostTableName() + " ORDER BY id") + suite.Require().NoError(err) + defer rows.Close() + + var results []struct { + id int + email string + } + for rows.Next() { + var id int + var email string + err = rows.Scan(&id, &email) + suite.Require().NoError(err) + results = append(results, struct { + id int + email string + }{id, email}) + } + suite.Require().NoError(rows.Err()) + + suite.Require().Len(results, 2) + suite.Require().Equal(1, results[0].id) + suite.Require().Equal("alice@example.com", results[0].email) + suite.Require().Equal(2, results[1].id) + suite.Require().Equal("bob@example.com", results[1].email) +} + +// TestPanicOnWarningsDisabled tests that when PanicOnWarnings is false, +// warnings are not checked and duplicates are silently ignored +func (suite *ApplierTestSuite) TestPanicOnWarningsDisabled() { + ctx := context.Background() + + var err error + + // Create table with id and email columns + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100));", getTestTableName())) + suite.Require().NoError(err) + + // Create ghost table with unique index on email + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), UNIQUE KEY email_unique (email));", getTestGhostTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContext := newTestMigrationContext() + migrationContext.ApplierConnectionConfig = connectionConfig + migrationContext.SetConnectionConfig("innodb") + + // PanicOnWarnings is false (default) + migrationContext.PanicOnWarnings = false + + migrationContext.OriginalTableColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.SharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.MappedSharedColumns = sql.NewColumnList([]string{"id", "email"}) + migrationContext.UniqueKey = &sql.UniqueKey{ + Name: "PRIMARY", + NameInGhostTable: "PRIMARY", + Columns: *sql.NewColumnList([]string{"id"}), + } + + applier := NewApplier(migrationContext) + suite.Require().NoError(applier.prepareQueries()) + defer applier.Teardown() + + err = applier.InitDBConnections() + suite.Require().NoError(err) + + // Insert initial rows into ghost table + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (id, email) VALUES (1, 'alice@example.com'), (2, 'bob@example.com');", getTestGhostTableName())) + suite.Require().NoError(err) + + // Simulate binlog event: insert duplicate email on non-migration index + // With PanicOnWarnings disabled, this should succeed (INSERT IGNORE skips it) + dmlEvents := []*binlog.BinlogDMLEvent{ + { + DatabaseName: testMysqlDatabase, + TableName: testMysqlTableName, + DML: binlog.InsertDML, + NewColumnValues: sql.ToColumnValues([]interface{}{3, "alice@example.com"}), // duplicate email + }, + } + + // Should succeed because PanicOnWarnings is disabled + err = applier.ApplyDMLEventQueries(dmlEvents) + suite.Require().NoError(err) + + // Verify that only 2 original rows exist with correct data (the duplicate was silently ignored) + rows, err := suite.db.Query("SELECT id, email FROM " + getTestGhostTableName() + " ORDER BY id") + suite.Require().NoError(err) + defer rows.Close() + + var results []struct { + id int + email string + } + for rows.Next() { + var id int + var email string + err = rows.Scan(&id, &email) + suite.Require().NoError(err) + results = append(results, struct { + id int + email string + }{id, email}) + } + suite.Require().NoError(rows.Err()) + + // Should still have exactly 2 original rows (id=3 was silently ignored) + suite.Require().Len(results, 2) + suite.Require().Equal(1, results[0].id) + suite.Require().Equal("alice@example.com", results[0].email) + suite.Require().Equal(2, results[1].id) + suite.Require().Equal("bob@example.com", results[1].email) +} + func TestApplier(t *testing.T) { suite.Run(t, new(ApplierTestSuite)) } From c08e368d7317327d307c03221735ee21c4bc38f9 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Mon, 9 Mar 2026 19:31:48 -0700 Subject: [PATCH 02/12] Address Copilot feedback --- go/logic/applier.go | 33 +++++++++++++++++++-------------- go/logic/applier_test.go | 27 ++++++++++++++------------- 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index a344919c9..5fbc44282 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -94,6 +94,21 @@ func NewApplier(migrationContext *base.MigrationContext) *Applier { } } +// compileMigrationKeyWarningRegex compiles a regex pattern that matches duplicate key warnings +// for the migration's unique key. Duplicate warnings are formatted differently across MySQL versions, +// hence the optional table name prefix. Metacharacters in table/index names are escaped to avoid +// regex syntax errors. +func (this *Applier) compileMigrationKeyWarningRegex() (*regexp.Regexp, error) { + escapedTable := regexp.QuoteMeta(this.migrationContext.GetGhostTableName()) + escapedKey := regexp.QuoteMeta(this.migrationContext.UniqueKey.NameInGhostTable) + migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) + migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) + if err != nil { + return nil, fmt.Errorf("failed to compile migration key pattern: %w", err) + } + return migrationKeyRegex, nil +} + func (this *Applier) InitDBConnections() (err error) { applierUri := this.connectionConfig.GetDBUri(this.migrationContext.DatabaseName) uriWithMulti := fmt.Sprintf("%s&multiStatements=true", applierUri) @@ -918,14 +933,9 @@ func (this *Applier) ApplyIterationInsertQuery() (chunkSize int64, rowsAffected } // Compile regex once before loop to avoid performance penalty and handle errors properly - // Escape metacharacters to avoid regex syntax errors with table/index names like "my_table[test]" or "idx.name" - // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix - escapedTable := regexp.QuoteMeta(this.migrationContext.GetGhostTableName()) - escapedKey := regexp.QuoteMeta(this.migrationContext.UniqueKey.NameInGhostTable) - migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) - migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) + migrationKeyRegex, err := this.compileMigrationKeyWarningRegex() if err != nil { - return nil, fmt.Errorf("failed to compile migration key pattern: %w", err) + return nil, err } var sqlWarnings []string @@ -1568,14 +1578,9 @@ func (this *Applier) ApplyDMLEventQueries(dmlEvents [](*binlog.BinlogDMLEvent)) } // Compile regex once before loop to avoid performance penalty and handle errors properly - // Escape metacharacters to avoid regex syntax errors with table/index names like "my_table[test]" or "idx.name" - // Duplicate warnings are formatted differently across mysql versions, hence the optional table name prefix - escapedTable := regexp.QuoteMeta(this.migrationContext.GetGhostTableName()) - escapedKey := regexp.QuoteMeta(this.migrationContext.UniqueKey.NameInGhostTable) - migrationUniqueKeyPattern := fmt.Sprintf(`for key '(%s\.)?%s'`, escapedTable, escapedKey) - migrationKeyRegex, err := regexp.Compile(migrationUniqueKeyPattern) + migrationKeyRegex, err := this.compileMigrationKeyWarningRegex() if err != nil { - return rollback(fmt.Errorf("failed to compile migration key pattern: %w", err)) + return rollback(err) } var sqlWarnings []string diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index e87b04b47..3120550e7 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -1082,7 +1082,7 @@ func (suite *ApplierTestSuite) TestDuplicateOnMigrationKeyAllowedInBinlogReplay( // TestRegexMetacharactersInIndexName tests that index names with regex metacharacters // are properly escaped. We test with a plus sign in the index name, which without -// QuoteMeta would be treated as a regex quantifier (one or more of preceding character). +// QuoteMeta would be treated as a regex quantifier (one or more of 'x' in this case). // This test verifies the pattern matches ONLY the exact index name, not a regex pattern. func (suite *ApplierTestSuite) TestRegexMetacharactersInIndexName() { ctx := context.Background() @@ -1090,8 +1090,8 @@ func (suite *ApplierTestSuite) TestRegexMetacharactersInIndexName() { var err error // Create tables with an index name containing a plus sign - // Without QuoteMeta, "idx+" would mean "one or more 'd'" in regex - _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100));", getTestTableName())) + // Without QuoteMeta, "idx+email" would be treated as a regex pattern where + is a quantifier + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), UNIQUE KEY `idx+email` (email));", getTestTableName())) suite.Require().NoError(err) // MySQL allows + in index names when quoted @@ -1111,9 +1111,9 @@ func (suite *ApplierTestSuite) TestRegexMetacharactersInIndexName() { migrationContext.SharedColumns = sql.NewColumnList([]string{"id", "email"}) migrationContext.MappedSharedColumns = sql.NewColumnList([]string{"id", "email"}) migrationContext.UniqueKey = &sql.UniqueKey{ - Name: "PRIMARY", - NameInGhostTable: "PRIMARY", - Columns: *sql.NewColumnList([]string{"id"}), + Name: "idx+email", + NameInGhostTable: "idx+email", + Columns: *sql.NewColumnList([]string{"email"}), } applier := NewApplier(migrationContext) @@ -1127,32 +1127,33 @@ func (suite *ApplierTestSuite) TestRegexMetacharactersInIndexName() { _, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (id, email) VALUES (1, 'alice@example.com'), (2, 'bob@example.com');", getTestGhostTableName())) suite.Require().NoError(err) - // Test: duplicate on PRIMARY KEY (migration key) should be allowed + // Test: duplicate on idx+email (the migration key) should be allowed + // This verifies our regex correctly identifies "idx+email" as the migration key + // Without regexp.QuoteMeta, the + would be treated as a regex quantifier and might not match correctly dmlEvents := []*binlog.BinlogDMLEvent{ { DatabaseName: testMysqlDatabase, TableName: testMysqlTableName, DML: binlog.InsertDML, - NewColumnValues: sql.ToColumnValues([]interface{}{1, "alice@example.com"}), + NewColumnValues: sql.ToColumnValues([]interface{}{3, "alice@example.com"}), }, } err = applier.ApplyDMLEventQueries(dmlEvents) - suite.Require().NoError(err, "Duplicate on PRIMARY should be allowed even with + in index name") + suite.Require().NoError(err, "Duplicate on idx+email (migration key) should be allowed with PanicOnWarnings enabled") - // Test: duplicate on idx+email should fail - // This verifies our regex correctly identifies "idx+email" (not as a regex pattern) + // Test: duplicate on PRIMARY (not the migration key) should fail dmlEvents = []*binlog.BinlogDMLEvent{ { DatabaseName: testMysqlDatabase, TableName: testMysqlTableName, DML: binlog.InsertDML, - NewColumnValues: sql.ToColumnValues([]interface{}{3, "alice@example.com"}), + NewColumnValues: sql.ToColumnValues([]interface{}{1, "charlie@example.com"}), }, } err = applier.ApplyDMLEventQueries(dmlEvents) - suite.Require().Error(err, "Duplicate on idx+email should fail") + suite.Require().Error(err, "Duplicate on PRIMARY (not migration key) should fail with PanicOnWarnings enabled") suite.Require().Contains(err.Error(), "Duplicate entry") // Verify final state - should still have only the original 2 rows From a095177f010107f59aed47863d3680bc7ebd476b Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Tue, 10 Mar 2026 11:40:49 -0700 Subject: [PATCH 03/12] Add test for warnings on composite unique keys --- go/logic/applier_test.go | 96 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index 3120550e7..aa6714f85 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -813,6 +813,102 @@ func (suite *ApplierTestSuite) TestPanicOnWarningsWithDuplicateKeyOnNonMigration suite.Require().Equal("user3@example.com", results[2].email) } +func (suite *ApplierTestSuite) TestPanicOnWarningsWithDuplicateCompositeUniqueKey() { + ctx := context.Background() + + var err error + + // Create table with id, email, and username columns + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), username VARCHAR(100));", getTestTableName())) + suite.Require().NoError(err) + + // Create ghost table with same schema plus a composite unique index on (email, username) + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("CREATE TABLE %s (id INT PRIMARY KEY, email VARCHAR(100), username VARCHAR(100), UNIQUE KEY email_username_unique (email, username));", getTestGhostTableName())) + suite.Require().NoError(err) + + connectionConfig, err := getTestConnectionConfig(ctx, suite.mysqlContainer) + suite.Require().NoError(err) + + migrationContext := newTestMigrationContext() + migrationContext.ApplierConnectionConfig = connectionConfig + migrationContext.SetConnectionConfig("innodb") + + migrationContext.PanicOnWarnings = true + + migrationContext.OriginalTableColumns = sql.NewColumnList([]string{"id", "email", "username"}) + migrationContext.SharedColumns = sql.NewColumnList([]string{"id", "email", "username"}) + migrationContext.MappedSharedColumns = sql.NewColumnList([]string{"id", "email", "username"}) + migrationContext.UniqueKey = &sql.UniqueKey{ + Name: "PRIMARY", + NameInGhostTable: "PRIMARY", + Columns: *sql.NewColumnList([]string{"id"}), + } + + applier := NewApplier(migrationContext) + suite.Require().NoError(applier.prepareQueries()) + defer applier.Teardown() + + err = applier.InitDBConnections() + suite.Require().NoError(err) + + // Insert initial rows into ghost table (simulating bulk copy phase) + // alice@example.com + bob is ok due to composite unique index + _, err = suite.db.ExecContext(ctx, fmt.Sprintf("INSERT INTO %s (id, email, username) VALUES (1, 'alice@example.com', 'alice'), (2, 'alice@example.com', 'bob'), (3, 'charlie@example.com', 'charlie');", getTestGhostTableName())) + suite.Require().NoError(err) + + // Simulate binlog event: try to insert a row with duplicate composite key (email + username) + // This should fail with a warning because the ghost table has a composite unique index + dmlEvents := []*binlog.BinlogDMLEvent{ + { + DatabaseName: testMysqlDatabase, + TableName: testMysqlTableName, + DML: binlog.InsertDML, + NewColumnValues: sql.ToColumnValues([]interface{}{4, "alice@example.com", "alice"}), // duplicate (email, username) + }, + } + + // This should return an error when PanicOnWarnings is enabled + err = applier.ApplyDMLEventQueries(dmlEvents) + suite.Require().Error(err) + suite.Require().Contains(err.Error(), "Duplicate entry") + + // Verify that the ghost table still has only the original 3 rows with correct data (no data loss) + rows, err := suite.db.Query("SELECT id, email, username FROM " + getTestGhostTableName() + " ORDER BY id") + suite.Require().NoError(err) + defer rows.Close() + + var results []struct { + id int + email string + username string + } + for rows.Next() { + var id int + var email string + var username string + err = rows.Scan(&id, &email, &username) + suite.Require().NoError(err) + results = append(results, struct { + id int + email string + username string + }{id, email, username}) + } + suite.Require().NoError(rows.Err()) + + // All 3 original rows should still be present with correct data + suite.Require().Len(results, 3) + suite.Require().Equal(1, results[0].id) + suite.Require().Equal("alice@example.com", results[0].email) + suite.Require().Equal("alice", results[0].username) + suite.Require().Equal(2, results[1].id) + suite.Require().Equal("alice@example.com", results[1].email) + suite.Require().Equal("bob", results[1].username) + suite.Require().Equal(3, results[2].id) + suite.Require().Equal("charlie@example.com", results[2].email) + suite.Require().Equal("charlie", results[2].username) +} + // TestUpdateModifyingUniqueKeyWithDuplicateOnOtherIndex tests the scenario where: // 1. An UPDATE modifies the unique key (converted to DELETE+INSERT) // 2. The INSERT would create a duplicate on a NON-migration unique index From 9fee640d712cfa03bbb22ead10fa88ecb5564dcc Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Tue, 10 Mar 2026 12:19:47 -0700 Subject: [PATCH 04/12] Add test for updating pk with duplicate --- .../create.sql | 27 +++++++++++++++++++ .../expect_failure | 1 + .../extra_args | 1 + 3 files changed, 29 insertions(+) create mode 100644 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql create mode 100644 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/expect_failure create mode 100644 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql new file mode 100644 index 000000000..e3c4a8d9b --- /dev/null +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql @@ -0,0 +1,27 @@ +drop table if exists gh_ost_test; +create table gh_ost_test ( + id int auto_increment, + email varchar(100) not null, + primary key (id) +) auto_increment=1; + +insert into gh_ost_test (email) values ('alice@example.com'); +insert into gh_ost_test (email) values ('bob@example.com'); +insert into gh_ost_test (email) values ('charlie@example.com'); + +drop event if exists gh_ost_test; +delimiter ;; +create event gh_ost_test + on schedule every 1 second + starts current_timestamp + interval 3 second + ends current_timestamp + interval 60 second + on completion not preserve + enable + do +begin + -- This UPDATE modifies the primary key, so it will be converted to DELETE + INSERT + -- The INSERT will attempt to insert email='alice@example.com' (duplicate) + -- which violates the new unique index being added by the migration + -- Delay ensures this fires during binlog apply phase, not bulk copy + update gh_ost_test set id=10, email='alice@example.com' where id=2; +end ;; diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/expect_failure b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/expect_failure new file mode 100644 index 000000000..a54788d01 --- /dev/null +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/expect_failure @@ -0,0 +1 @@ +Warnings detected during DML event application diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args new file mode 100644 index 000000000..abe611b4b --- /dev/null +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args @@ -0,0 +1 @@ +--panic-on-warnings --alter "ADD UNIQUE KEY email_unique (email)" From 06df80db74f1bece2a6f5fbd8cffe923f2d9731c Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 10:41:10 -0700 Subject: [PATCH 05/12] Improve replica test debugging - Print log excerpt on failure - Upload full log artifacts on failure --- .github/workflows/replica-tests.yml | 14 ++++++++++++++ localtests/test.sh | 20 ++++++++++---------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/.github/workflows/replica-tests.yml b/.github/workflows/replica-tests.yml index 957d7a176..5a9bfb7f7 100644 --- a/.github/workflows/replica-tests.yml +++ b/.github/workflows/replica-tests.yml @@ -28,6 +28,20 @@ jobs: - name: Run tests run: script/docker-gh-ost-replica-tests run + - name: Set artifact name + if: failure() + run: | + ARTIFACT_NAME=$(echo "${{ matrix.image }}" | tr '/:' '-') + echo "ARTIFACT_NAME=test-logs-${ARTIFACT_NAME}" >> $GITHUB_ENV + + - name: Upload test logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ${{ env.ARTIFACT_NAME }} + path: /tmp/gh-ost-test.* + retention-days: 7 + - name: Teardown environment if: always() run: script/docker-gh-ost-replica-tests down diff --git a/localtests/test.sh b/localtests/test.sh index 404eeece3..dc7917b3f 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -107,12 +107,6 @@ verify_master_and_replica() { fi } -exec_cmd() { - echo "$@" - command "$@" 1>$test_logfile 2>&1 - return $? -} - echo_dot() { echo -n "." } @@ -225,7 +219,7 @@ test_single() { cat $tests_path/$test_name/create.sql return 1 fi - + if [ -f $tests_path/$test_name/before.sql ]; then gh-ost-test-mysql-master --default-character-set=utf8mb4 test < $tests_path/$test_name/before.sql gh-ost-test-mysql-replica --default-character-set=utf8mb4 test < $tests_path/$test_name/before.sql @@ -310,7 +304,7 @@ test_single() { echo_dot echo $cmd >$exec_command_file echo_dot - bash $exec_command_file 1>$test_logfile 2>&1 + bash $exec_command_file >$test_logfile 2>&1 execution_result=$? cleanup @@ -332,7 +326,10 @@ test_single() { if [ -f $tests_path/$test_name/expect_failure ]; then if [ $execution_result -eq 0 ]; then echo - echo "ERROR $test_name execution was expected to exit on error but did not. cat $test_logfile" + echo "ERROR $test_name execution was expected to exit on error but did not." + echo "=== Last 50 lines of $test_logfile ===" + tail -n 50 $test_logfile + echo "=== End log excerpt ===" return 1 fi if [ -s $tests_path/$test_name/expect_failure ]; then @@ -342,7 +339,10 @@ test_single() { return 0 fi echo - echo "ERROR $test_name execution was expected to exit with error message '${expected_error_message}' but did not. cat $test_logfile" + echo "ERROR $test_name execution was expected to exit with error message '${expected_error_message}' but did not." + echo "=== Last 50 lines of $test_logfile ===" + tail -n 50 $test_logfile + echo "=== End log excerpt ===" return 1 fi # 'expect_failure' file has no content. We generally agree that the failure is correct From 6c124f40ed95d3b5faa761dc50556eb394e191a3 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 11:13:01 -0700 Subject: [PATCH 06/12] Reduce flakiness in update-pk test --- .../create.sql | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql index e3c4a8d9b..09fec1c05 100644 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql @@ -9,19 +9,33 @@ insert into gh_ost_test (email) values ('alice@example.com'); insert into gh_ost_test (email) values ('bob@example.com'); insert into gh_ost_test (email) values ('charlie@example.com'); +-- Add many rows to extend row copy duration significantly +-- Need enough rows that copying takes multiple seconds to give event time to detect +-- With chunk-size=10, 1000 rows = 100 chunks which should take several seconds +insert into gh_ost_test (email) +select concat('user', @row := @row + 1, '@example.com') +from (select @row := 3) init, + (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t1, + (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t2, + (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t3 +limit 1000; + drop event if exists gh_ost_test; delimiter ;; create event gh_ost_test on schedule every 1 second - starts current_timestamp + interval 3 second + starts current_timestamp ends current_timestamp + interval 60 second on completion not preserve enable do begin - -- This UPDATE modifies the primary key, so it will be converted to DELETE + INSERT - -- The INSERT will attempt to insert email='alice@example.com' (duplicate) - -- which violates the new unique index being added by the migration - -- Delay ensures this fires during binlog apply phase, not bulk copy - update gh_ost_test set id=10, email='alice@example.com' where id=2; + -- Poll for row copy to start by checking if alice has been copied to ghost table + -- Once row copy has started, fire the UPDATE that creates a duplicate + if exists (select 1 from test._gh_ost_test_gho where email = 'alice@example.com') then + -- This UPDATE modifies the primary key, so it will be converted to DELETE + INSERT + -- The INSERT will attempt to insert email='alice@example.com' (duplicate) + -- which violates the new unique index being added by the migration + update gh_ost_test set id=10, email='alice@example.com' where id=2; + end if; end ;; From 679115a62916b021dc5ebfeaf093b481d53ca554 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 14:00:25 -0700 Subject: [PATCH 07/12] Revise test --- .../create.sql | 21 ++++++++++--------- .../destroy.sql | 1 + 2 files changed, 12 insertions(+), 10 deletions(-) create mode 100644 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql index 09fec1c05..85c1a4ede 100644 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql @@ -9,16 +9,14 @@ insert into gh_ost_test (email) values ('alice@example.com'); insert into gh_ost_test (email) values ('bob@example.com'); insert into gh_ost_test (email) values ('charlie@example.com'); --- Add many rows to extend row copy duration significantly --- Need enough rows that copying takes multiple seconds to give event time to detect --- With chunk-size=10, 1000 rows = 100 chunks which should take several seconds +-- Add enough rows to give a window for the event to fire +-- With chunk-size=10, 100 rows = 10 chunks insert into gh_ost_test (email) select concat('user', @row := @row + 1, '@example.com') from (select @row := 3) init, (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t1, - (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t2, - (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t3 -limit 1000; + (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t2 +limit 100; drop event if exists gh_ost_test; delimiter ;; @@ -30,12 +28,15 @@ create event gh_ost_test enable do begin - -- Poll for row copy to start by checking if alice has been copied to ghost table - -- Once row copy has started, fire the UPDATE that creates a duplicate - if exists (select 1 from test._gh_ost_test_gho where email = 'alice@example.com') then + -- Poll for row copy to complete by checking if last row has been copied + -- Once row copy is done but before cutover, fire the UPDATE that creates a duplicate + -- Check a flag table to ensure we only fire once + if exists (select 1 from test._gh_ost_test_gho where id >= 100) + and not exists (select 1 from information_schema.tables where table_schema='test' and table_name='_gh_ost_fired') then + create table test._gh_ost_fired (id int); -- This UPDATE modifies the primary key, so it will be converted to DELETE + INSERT -- The INSERT will attempt to insert email='alice@example.com' (duplicate) -- which violates the new unique index being added by the migration - update gh_ost_test set id=10, email='alice@example.com' where id=2; + update gh_ost_test set id=200, email='alice@example.com' where id=2; end if; end ;; diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql new file mode 100644 index 000000000..8d1e3a001 --- /dev/null +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql @@ -0,0 +1 @@ +drop table if exists _gh_ost_fired; From 560b69dc36dbb8f5ef89a2b93b0f895b17bcc500 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 14:31:40 -0700 Subject: [PATCH 08/12] More robust test fix --- .../create.sql | 25 ++++--------------- .../destroy.sql | 1 - 2 files changed, 5 insertions(+), 21 deletions(-) delete mode 100644 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql index 85c1a4ede..3442ce90c 100644 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql @@ -9,15 +9,6 @@ insert into gh_ost_test (email) values ('alice@example.com'); insert into gh_ost_test (email) values ('bob@example.com'); insert into gh_ost_test (email) values ('charlie@example.com'); --- Add enough rows to give a window for the event to fire --- With chunk-size=10, 100 rows = 10 chunks -insert into gh_ost_test (email) -select concat('user', @row := @row + 1, '@example.com') -from (select @row := 3) init, - (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t1, - (select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) t2 -limit 100; - drop event if exists gh_ost_test; delimiter ;; create event gh_ost_test @@ -28,15 +19,9 @@ create event gh_ost_test enable do begin - -- Poll for row copy to complete by checking if last row has been copied - -- Once row copy is done but before cutover, fire the UPDATE that creates a duplicate - -- Check a flag table to ensure we only fire once - if exists (select 1 from test._gh_ost_test_gho where id >= 100) - and not exists (select 1 from information_schema.tables where table_schema='test' and table_name='_gh_ost_fired') then - create table test._gh_ost_fired (id int); - -- This UPDATE modifies the primary key, so it will be converted to DELETE + INSERT - -- The INSERT will attempt to insert email='alice@example.com' (duplicate) - -- which violates the new unique index being added by the migration - update gh_ost_test set id=200, email='alice@example.com' where id=2; - end if; + -- Keep inserting rows, then updating their PK to create DELETE+INSERT binlog events + -- Use alice's email so it conflicts with id=1 when applied to ghost table + insert ignore into gh_ost_test (email) values ('temp@example.com'); + update gh_ost_test set id = 1000 + id, email = 'alice@example.com' + where email = 'temp@example.com' order by id desc limit 1; end ;; diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql deleted file mode 100644 index 8d1e3a001..000000000 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/destroy.sql +++ /dev/null @@ -1 +0,0 @@ -drop table if exists _gh_ost_fired; From b123226df7f13f1b05d93a2a2e5e34ca6d6ba7a9 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 14:57:31 -0700 Subject: [PATCH 09/12] Make MySQL wait strategy less flaky Removed the `wait.ForExposedPort()` override from test files. The tests will now use the MySQL module's default wait strategy (`wait.ForLog("port: 3306 MySQL Community Server")`), which properly waits for MySQL to be ready to accept connections. Otherwise the port may be exposed, but MySQL is still initializing and not ready to accept connections. --- go/logic/applier_test.go | 2 -- go/logic/migrator_test.go | 2 -- go/logic/streamer_test.go | 2 -- 3 files changed, 6 deletions(-) diff --git a/go/logic/applier_test.go b/go/logic/applier_test.go index aa6714f85..6b02e0c02 100644 --- a/go/logic/applier_test.go +++ b/go/logic/applier_test.go @@ -23,7 +23,6 @@ import ( "github.com/github/gh-ost/go/binlog" "github.com/github/gh-ost/go/mysql" "github.com/github/gh-ost/go/sql" - "github.com/testcontainers/testcontainers-go/wait" ) func TestApplierGenerateSqlModeQuery(t *testing.T) { @@ -213,7 +212,6 @@ func (suite *ApplierTestSuite) SetupSuite() { testmysql.WithDatabase(testMysqlDatabase), testmysql.WithUsername(testMysqlUser), testmysql.WithPassword(testMysqlPass), - testcontainers.WithWaitStrategy(wait.ForExposedPort()), testmysql.WithConfigFile("my.cnf.test"), ) suite.Require().NoError(err) diff --git a/go/logic/migrator_test.go b/go/logic/migrator_test.go index c4fd49233..fcffa777a 100644 --- a/go/logic/migrator_test.go +++ b/go/logic/migrator_test.go @@ -32,7 +32,6 @@ import ( "github.com/github/gh-ost/go/mysql" "github.com/github/gh-ost/go/sql" "github.com/testcontainers/testcontainers-go" - "github.com/testcontainers/testcontainers-go/wait" ) func TestMigratorOnChangelogEvent(t *testing.T) { @@ -302,7 +301,6 @@ func (suite *MigratorTestSuite) SetupSuite() { testmysql.WithDatabase(testMysqlDatabase), testmysql.WithUsername(testMysqlUser), testmysql.WithPassword(testMysqlPass), - testcontainers.WithWaitStrategy(wait.ForExposedPort()), testmysql.WithConfigFile("my.cnf.test"), ) suite.Require().NoError(err) diff --git a/go/logic/streamer_test.go b/go/logic/streamer_test.go index 2c5d3886b..8e0b57f80 100644 --- a/go/logic/streamer_test.go +++ b/go/logic/streamer_test.go @@ -13,7 +13,6 @@ import ( "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/modules/mysql" - "github.com/testcontainers/testcontainers-go/wait" "golang.org/x/sync/errgroup" ) @@ -31,7 +30,6 @@ func (suite *EventsStreamerTestSuite) SetupSuite() { mysql.WithDatabase(testMysqlDatabase), mysql.WithUsername(testMysqlUser), mysql.WithPassword(testMysqlPass), - testcontainers.WithWaitStrategy(wait.ForExposedPort()), ) suite.Require().NoError(err) From ac2c9c563be00b9130dba6fa046fb385faf945a3 Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Wed, 11 Mar 2026 15:34:36 -0700 Subject: [PATCH 10/12] Customize update-pk integration test Add support for test-specific execution so that we can guarantee that we're specifically testing the DML apply phase --- .../create.sql | 17 -- .../extra_args | 2 +- .../test.sh | 44 ++++++ localtests/test.sh | 145 ++++++++++-------- 4 files changed, 130 insertions(+), 78 deletions(-) create mode 100755 localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql index 3442ce90c..444092cb2 100644 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/create.sql @@ -8,20 +8,3 @@ create table gh_ost_test ( insert into gh_ost_test (email) values ('alice@example.com'); insert into gh_ost_test (email) values ('bob@example.com'); insert into gh_ost_test (email) values ('charlie@example.com'); - -drop event if exists gh_ost_test; -delimiter ;; -create event gh_ost_test - on schedule every 1 second - starts current_timestamp - ends current_timestamp + interval 60 second - on completion not preserve - enable - do -begin - -- Keep inserting rows, then updating their PK to create DELETE+INSERT binlog events - -- Use alice's email so it conflicts with id=1 when applied to ghost table - insert ignore into gh_ost_test (email) values ('temp@example.com'); - update gh_ost_test set id = 1000 + id, email = 'alice@example.com' - where email = 'temp@example.com' order by id desc limit 1; -end ;; diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args index abe611b4b..04c41a471 100644 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/extra_args @@ -1 +1 @@ ---panic-on-warnings --alter "ADD UNIQUE KEY email_unique (email)" +--panic-on-warnings --alter "ADD UNIQUE KEY email_unique (email)" --postpone-cut-over-flag-file=/tmp/gh-ost-test.postpone-cutover diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh new file mode 100755 index 000000000..cd34937a0 --- /dev/null +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Custom test: inject conflicting data AFTER row copy completes +# This tests the DML event application code path, not row copy + +# Create postpone flag file (referenced in extra_args) +postpone_flag_file=/tmp/gh-ost-test.postpone-cutover +touch $postpone_flag_file + +# Build gh-ost command using framework function +build_ghost_command + +# Run in background +echo_dot +# Clear log file before starting gh-ost +echo > $test_logfile +bash -c "$cmd" >>$test_logfile 2>&1 & +ghost_pid=$! + +# Wait for row copy to complete +echo_dot +for i in {1..30}; do + grep -q "Row copy complete" $test_logfile && break + ps -p $ghost_pid > /dev/null || { echo; echo "ERROR gh-ost exited early"; rm -f $postpone_flag_file; return 1; } + sleep 1; echo_dot +done + +# Inject conflicting SQL after row copy (UPDATE with PK change creates DELETE+INSERT in binlog) +echo_dot +gh-ost-test-mysql-master test -e "update gh_ost_test set id = 200, email = 'alice@example.com' where id = 2" + +# Wait for binlog event to replicate and be applied +sleep 10; echo_dot + +# Complete cutover by removing postpone flag +rm -f $postpone_flag_file + +# Wait for gh-ost to complete +wait $ghost_pid +execution_result=$? +rm -f $postpone_flag_file + +# Validate using framework function +validate_expected_failure +return $? diff --git a/localtests/test.sh b/localtests/test.sh index dc7917b3f..68dd4150c 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -135,6 +135,77 @@ start_replication() { done } +build_ghost_command() { + # Build gh-ost command with all standard options + # Expects: ghost_binary, replica_host, replica_port, master_host, master_port, + # table_name, storage_engine, throttle_flag_file, extra_args + cmd="GOTRACEBACK=crash $ghost_binary \ + --user=gh-ost \ + --password=gh-ost \ + --host=$replica_host \ + --port=$replica_port \ + --assume-master-host=${master_host}:${master_port} \ + --database=test \ + --table=${table_name} \ + --storage-engine=${storage_engine} \ + --alter='engine=${storage_engine}' \ + --exact-rowcount \ + --assume-rbr \ + --skip-metadata-lock-check \ + --initially-drop-old-table \ + --initially-drop-ghost-table \ + --throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _${table_name}_ghc' \ + --throttle-flag-file=$throttle_flag_file \ + --serve-socket-file=/tmp/gh-ost.test.sock \ + --initially-drop-socket-file \ + --test-on-replica \ + --default-retries=3 \ + --chunk-size=10 \ + --verbose \ + --debug \ + --stack \ + --checkpoint \ + --execute ${extra_args[@]}" +} + +validate_expected_failure() { + # Check if test expected to fail and validate error message + # Expects: tests_path, test_name, execution_result, test_logfile + if [ -f $tests_path/$test_name/expect_failure ]; then + if [ $execution_result -eq 0 ]; then + echo + echo "ERROR $test_name execution was expected to exit on error but did not." + echo "=== Last 50 lines of $test_logfile ===" + tail -n 50 $test_logfile + echo "=== End log excerpt ===" + return 1 + fi + if [ -s $tests_path/$test_name/expect_failure ]; then + # 'expect_failure' file has content. We expect to find this content in the log. + expected_error_message="$(cat $tests_path/$test_name/expect_failure)" + if grep -q "$expected_error_message" $test_logfile; then + return 0 + fi + echo + echo "ERROR $test_name execution was expected to exit with error message '${expected_error_message}' but did not." + echo "=== Last 50 lines of $test_logfile ===" + tail -n 50 $test_logfile + echo "=== End log excerpt ===" + return 1 + fi + # 'expect_failure' file has no content. We generally agree that the failure is correct + return 0 + fi + + if [ $execution_result -ne 0 ]; then + echo + echo "ERROR $test_name execution failure. cat $test_logfile:" + cat $test_logfile + return 1 + fi + return 0 +} + sysbench_prepare() { local mysql_host="$1" local mysql_port="$2" @@ -253,6 +324,15 @@ test_single() { table_name="gh_ost_test" ghost_table_name="_gh_ost_test_gho" + + # Check for custom test script + if [ -f $tests_path/$test_name/test.sh ]; then + # Source the custom test script which can override default behavior + # It has access to all variables and functions from this script + source $tests_path/$test_name/test.sh + return $? + fi + # test with sysbench oltp write load if [[ "$test_name" == "sysbench" ]]; then if ! command -v sysbench &>/dev/null; then @@ -273,34 +353,8 @@ test_single() { fi trap cleanup SIGINT - # - cmd="GOTRACEBACK=crash $ghost_binary \ - --user=gh-ost \ - --password=gh-ost \ - --host=$replica_host \ - --port=$replica_port \ - --assume-master-host=${master_host}:${master_port} - --database=test \ - --table=${table_name} \ - --storage-engine=${storage_engine} \ - --alter='engine=${storage_engine}' \ - --exact-rowcount \ - --assume-rbr \ - --skip-metadata-lock-check \ - --initially-drop-old-table \ - --initially-drop-ghost-table \ - --throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _${table_name}_ghc' \ - --throttle-flag-file=$throttle_flag_file \ - --serve-socket-file=/tmp/gh-ost.test.sock \ - --initially-drop-socket-file \ - --test-on-replica \ - --default-retries=3 \ - --chunk-size=10 \ - --verbose \ - --debug \ - --stack \ - --checkpoint \ - --execute ${extra_args[@]}" + # Build and execute gh-ost command + build_ghost_command echo_dot echo $cmd >$exec_command_file echo_dot @@ -323,38 +377,9 @@ test_single() { gh-ost-test-mysql-master --default-character-set=utf8mb4 test <$tests_path/$test_name/destroy.sql fi - if [ -f $tests_path/$test_name/expect_failure ]; then - if [ $execution_result -eq 0 ]; then - echo - echo "ERROR $test_name execution was expected to exit on error but did not." - echo "=== Last 50 lines of $test_logfile ===" - tail -n 50 $test_logfile - echo "=== End log excerpt ===" - return 1 - fi - if [ -s $tests_path/$test_name/expect_failure ]; then - # 'expect_failure' file has content. We expect to find this content in the log. - expected_error_message="$(cat $tests_path/$test_name/expect_failure)" - if grep -q "$expected_error_message" $test_logfile; then - return 0 - fi - echo - echo "ERROR $test_name execution was expected to exit with error message '${expected_error_message}' but did not." - echo "=== Last 50 lines of $test_logfile ===" - tail -n 50 $test_logfile - echo "=== End log excerpt ===" - return 1 - fi - # 'expect_failure' file has no content. We generally agree that the failure is correct - return 0 - fi - - if [ $execution_result -ne 0 ]; then - echo - echo "ERROR $test_name execution failure. cat $test_logfile:" - cat $test_logfile - return 1 - fi + # Validate expected failure or success + validate_expected_failure + return $? gh-ost-test-mysql-replica --default-character-set=utf8mb4 test -e "show create table ${ghost_table_name}\G" -ss >$ghost_structure_output_file From b2d0f6a89b0f8ab61f4326d1b642cdbb7d2989fc Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Mon, 16 Mar 2026 11:00:02 -0700 Subject: [PATCH 11/12] Fix regression in integration test harness --- localtests/test.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/localtests/test.sh b/localtests/test.sh index 68dd4150c..2b7c8ea85 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -378,9 +378,16 @@ test_single() { fi # Validate expected failure or success - validate_expected_failure - return $? + if ! validate_expected_failure; then + return 1 + fi + + # If this was an expected failure test, we're done (no need to validate structure/checksums) + if [ -f $tests_path/$test_name/expect_failure ]; then + return 0 + fi + # Test succeeded - now validate structure and checksums gh-ost-test-mysql-replica --default-character-set=utf8mb4 test -e "show create table ${ghost_table_name}\G" -ss >$ghost_structure_output_file if [ -f $tests_path/$test_name/expect_table_structure ]; then From 6b83aea0fd7c9edae15cde621d2e7c910cce505d Mon Sep 17 00:00:00 2001 From: Gabriel Gilder Date: Mon, 16 Mar 2026 18:20:03 -0700 Subject: [PATCH 12/12] Add test timeouts and fix error propagation Prevent indefinite test hangs by adding 120-second timeout and duration reporting. Fix silent error drops by propagating errors from background write goroutines to PanicAbort channel. Check for abort in sleepWhileTrue loop and handle its error in cutOver. --- go/logic/migrator.go | 24 +++++-- .../test.sh | 12 +++- localtests/test.sh | 65 +++++++++++++++---- 3 files changed, 83 insertions(+), 18 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index ca5f5a729..e3e6d429d 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -124,6 +124,10 @@ func NewMigrator(context *base.MigrationContext, appVersion string) *Migrator { // (or fails with error) func (this *Migrator) sleepWhileTrue(operation func() (bool, error)) error { for { + // Check for abort before continuing + if err := this.checkAbort(); err != nil { + return err + } shouldSleep, err := operation() if err != nil { return err @@ -539,7 +543,12 @@ func (this *Migrator) Migrate() (err error) { if err := this.hooksExecutor.onBeforeRowCopy(); err != nil { return err } - go this.executeWriteFuncs() + go func() { + if err := this.executeWriteFuncs(); err != nil { + // Send error to PanicAbort to trigger abort + _ = base.SendWithContext(this.migrationContext.GetContext(), this.migrationContext.PanicAbort, err) + } + }() go this.iterateChunks() this.migrationContext.MarkRowCopyStartTime() go this.initiateStatus() @@ -679,7 +688,12 @@ func (this *Migrator) Revert() error { this.initiateThrottler() go this.initiateStatus() - go this.executeDMLWriteFuncs() + go func() { + if err := this.executeDMLWriteFuncs(); err != nil { + // Send error to PanicAbort to trigger abort + _ = base.SendWithContext(this.migrationContext.GetContext(), this.migrationContext.PanicAbort, err) + } + }() this.printStatus(ForcePrintStatusRule) var retrier func(func() error, ...bool) error @@ -755,7 +769,7 @@ func (this *Migrator) cutOver() (err error) { this.migrationContext.MarkPointOfInterest() this.migrationContext.Log.Debugf("checking for cut-over postpone") - this.sleepWhileTrue( + if err := this.sleepWhileTrue( func() (bool, error) { heartbeatLag := this.migrationContext.TimeSinceLastHeartbeatOnChangelog() maxLagMillisecondsThrottle := time.Duration(atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold)) * time.Millisecond @@ -783,7 +797,9 @@ func (this *Migrator) cutOver() (err error) { } return false, nil }, - ) + ); err != nil { + return err + } atomic.StoreInt64(&this.migrationContext.IsPostponingCutOver, 0) this.migrationContext.MarkPointOfInterest() this.migrationContext.Log.Debugf("checking for cut-over postpone: complete") diff --git a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh index cd34937a0..9f33be9a3 100755 --- a/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh +++ b/localtests/panic-on-warnings-update-pk-with-duplicate-on-new-unique-index/test.sh @@ -18,12 +18,22 @@ ghost_pid=$! # Wait for row copy to complete echo_dot +row_copy_complete=false for i in {1..30}; do - grep -q "Row copy complete" $test_logfile && break + if grep -q "Row copy complete" $test_logfile; then + row_copy_complete=true + break + fi ps -p $ghost_pid > /dev/null || { echo; echo "ERROR gh-ost exited early"; rm -f $postpone_flag_file; return 1; } sleep 1; echo_dot done +if ! $row_copy_complete; then + echo; echo "ERROR row copy did not complete within expected time" + rm -f $postpone_flag_file + return 1 +fi + # Inject conflicting SQL after row copy (UPDATE with PK change creates DELETE+INSERT in binlog) echo_dot gh-ost-test-mysql-master test -e "update gh_ost_test set id = 200, email = 'alice@example.com' where id = 2" diff --git a/localtests/test.sh b/localtests/test.sh index 2b7c8ea85..d918d473b 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -30,6 +30,8 @@ replica_port= original_sql_mode= current_gtid_mode= sysbench_pid= +test_timeout=120 +test_failure_log_tail_lines=50 OPTIND=1 while getopts "b:s:dtg" OPTION; do @@ -168,6 +170,12 @@ build_ghost_command() { --execute ${extra_args[@]}" } +print_log_excerpt() { + echo "=== Last $test_failure_log_tail_lines lines of $test_logfile ===" + tail -n $test_failure_log_tail_lines $test_logfile + echo "=== End log excerpt ===" +} + validate_expected_failure() { # Check if test expected to fail and validate error message # Expects: tests_path, test_name, execution_result, test_logfile @@ -175,9 +183,7 @@ validate_expected_failure() { if [ $execution_result -eq 0 ]; then echo echo "ERROR $test_name execution was expected to exit on error but did not." - echo "=== Last 50 lines of $test_logfile ===" - tail -n 50 $test_logfile - echo "=== End log excerpt ===" + print_log_excerpt return 1 fi if [ -s $tests_path/$test_name/expect_failure ]; then @@ -188,9 +194,7 @@ validate_expected_failure() { fi echo echo "ERROR $test_name execution was expected to exit with error message '${expected_error_message}' but did not." - echo "=== Last 50 lines of $test_logfile ===" - tail -n 50 $test_logfile - echo "=== End log excerpt ===" + print_log_excerpt return 1 fi # 'expect_failure' file has no content. We generally agree that the failure is correct @@ -327,10 +331,32 @@ test_single() { # Check for custom test script if [ -f $tests_path/$test_name/test.sh ]; then - # Source the custom test script which can override default behavior - # It has access to all variables and functions from this script - source $tests_path/$test_name/test.sh - return $? + # Run the custom test script in a subshell with timeout monitoring + # The subshell inherits all functions and variables from the current shell + (source $tests_path/$test_name/test.sh) & + test_pid=$! + + # Monitor the test with timeout + timeout_counter=0 + while kill -0 $test_pid 2>/dev/null; do + if [ $timeout_counter -ge $test_timeout ]; then + kill -TERM $test_pid 2>/dev/null + sleep 1 + kill -KILL $test_pid 2>/dev/null + wait $test_pid 2>/dev/null + echo + echo "ERROR $test_name execution timed out" + print_log_excerpt + return 1 + fi + sleep 1 + ((timeout_counter++)) + done + + # Get the exit code + wait $test_pid 2>/dev/null + execution_result=$? + return $execution_result fi # test with sysbench oltp write load @@ -358,11 +384,19 @@ test_single() { echo_dot echo $cmd >$exec_command_file echo_dot - bash $exec_command_file >$test_logfile 2>&1 + timeout $test_timeout bash $exec_command_file >$test_logfile 2>&1 execution_result=$? cleanup + # Check for timeout (exit code 124) + if [ $execution_result -eq 124 ]; then + echo + echo "ERROR $test_name execution timed out" + print_log_excerpt + return 1 + fi + if [ -f $tests_path/$test_name/sql_mode ]; then gh-ost-test-mysql-master --default-character-set=utf8mb4 test -e "set @@global.sql_mode='${original_sql_mode}'" gh-ost-test-mysql-replica --default-character-set=utf8mb4 test -e "set @@global.sql_mode='${original_sql_mode}'" @@ -441,14 +475,19 @@ test_all() { test_dirs=$(find "$tests_path" -mindepth 1 -maxdepth 1 ! -path . -type d | grep "$test_pattern" | sort) while read -r test_dir; do test_name=$(basename "$test_dir") + local test_start_time=$(date +%s) if ! test_single "$test_name"; then + local test_end_time=$(date +%s) + local test_duration=$((test_end_time - test_start_time)) create_statement=$(gh-ost-test-mysql-replica test -t -e "show create table ${ghost_table_name} \G") echo "$create_statement" >>$test_logfile - echo "+ FAIL" + echo "+ FAIL (${test_duration}s)" return 1 else + local test_end_time=$(date +%s) + local test_duration=$((test_end_time - test_start_time)) echo - echo "+ pass" + echo "+ pass (${test_duration}s)" fi mysql_version="$(gh-ost-test-mysql-replica -e "select @@version")" replica_terminology="slave"