diff --git a/normalizer.go b/normalizer.go index 4bed5c0..7563e3d 100644 --- a/normalizer.go +++ b/normalizer.go @@ -170,6 +170,7 @@ func (n *Normalizer) normalizeToken(lexer *Lexer, normalizedSQLBuilder *strings. var headState headState var colonCtx colonContext var ctes map[string]bool // Lazily initialized when first CTE is encountered + var inTableList bool var lastValueToken *LastValueToken @@ -180,7 +181,7 @@ func (n *Normalizer) normalizeToken(lexer *Lexer, normalizedSQLBuilder *strings. preProcessToken(token, lastValueToken) } if n.shouldCollectMetadata() { - n.collectMetadata(token, lastValueToken, meta, statementMetadata, &ctes) + n.collectMetadata(token, lastValueToken, meta, statementMetadata, &ctes, &inTableList) } n.normalizeSQL(token, lastValueToken, normalizedSQLBuilder, &groupablePlaceholder, &headState, &colonCtx, lexerOpts...) if token.Type == EOF { @@ -232,15 +233,18 @@ func (n *Normalizer) shouldCollectMetadata() bool { return n.config.CollectTables || n.config.CollectCommands || n.config.CollectComments || n.config.CollectProcedure } -func (n *Normalizer) collectMetadata(token *Token, lastValueToken *LastValueToken, meta *metadataSet, statementMetadata *StatementMetadata, ctes *map[string]bool) { +func (n *Normalizer) collectMetadata(token *Token, lastValueToken *LastValueToken, meta *metadataSet, statementMetadata *StatementMetadata, ctes *map[string]bool, inTableList *bool) { if n.config.CollectComments && (token.Type == COMMENT || token.Type == MULTILINE_COMMENT) { comment := token.Value meta.addMetadata(comment, meta.commentsSet, &statementMetadata.Comments) - } else if token.Type == COMMAND { - if n.config.CollectCommands { + } else if token.Type == COMMAND || token.Type == KEYWORD { + *inTableList = false + if n.config.CollectCommands && token.Type == COMMAND { command := strings.ToUpper(token.Value) meta.addMetadata(command, meta.commandsSet, &statementMetadata.Commands) } + } else if token.Type == PUNCTUATION && (token.Value == "(" || token.Value == ")") { + *inTableList = false } else if token.Type == IDENT || token.Type == QUOTED_IDENT || token.Type == FUNCTION { tokenVal := token.Value if token.Type == QUOTED_IDENT { @@ -261,7 +265,12 @@ func (n *Normalizer) collectMetadata(token *Token, lastValueToken *LastValueToke } (*ctes)[tokenVal] = true } else if n.config.CollectTables && lastValueToken.isTableIndicator { - // Collect table names, excluding any CTEs + *inTableList = true + isCTE := *ctes != nil && (*ctes)[tokenVal] + if !isCTE { + meta.addMetadata(tokenVal, meta.tablesSet, &statementMetadata.Tables) + } + } else if n.config.CollectTables && *inTableList && lastValueToken.Type == PUNCTUATION && lastValueToken.Value == "," { isCTE := *ctes != nil && (*ctes)[tokenVal] if !isCTE { meta.addMetadata(tokenVal, meta.tablesSet, &statementMetadata.Tables) diff --git a/normalizer_test.go b/normalizer_test.go index ac5cad2..75d8017 100644 --- a/normalizer_test.go +++ b/normalizer_test.go @@ -306,11 +306,11 @@ multiline comment */ input: "SELECT d.id, d.uuid, d.org_id, d.creator_id, d.updater_id, d.monitor_id, d.parent_id, d.original_parent_id, d.scope, d.start_dt, d.end_dt, d.canceled_dt, d.active, d.disabled, d.created, d.modified, d.message, d.monitor_tags, d.recurrence, d.mute_first_recovery_notification, d.scope_v2_query, d.scope_v2 FROM monitor_downtime d, org o WHERE o.id = d.org_id AND d.modified >= ? AND o.partition_num = ANY (?, ?, ?)", expected: "SELECT d.id, d.uuid, d.org_id, d.creator_id, d.updater_id, d.monitor_id, d.parent_id, d.original_parent_id, d.scope, d.start_dt, d.end_dt, d.canceled_dt, d.active, d.disabled, d.created, d.modified, d.message, d.monitor_tags, d.recurrence, d.mute_first_recovery_notification, d.scope_v2_query, d.scope_v2 FROM monitor_downtime d, org o WHERE o.id = d.org_id AND d.modified >= ? AND o.partition_num = ANY ( ? )", statementMetadata: StatementMetadata{ - Tables: []string{"monitor_downtime"}, + Tables: []string{"monitor_downtime", "org"}, Comments: []string{}, Commands: []string{"SELECT"}, Procedures: []string{}, - Size: 22, + Size: 25, }, }, { diff --git a/obfuscate_and_normalize_test.go b/obfuscate_and_normalize_test.go index 1c2b94a..215629d 100644 --- a/obfuscate_and_normalize_test.go +++ b/obfuscate_and_normalize_test.go @@ -632,6 +632,17 @@ multiline comment */ WithDBMS(DBMSOracle), }, }, + { + input: "SELECT events.target_id, events.created_at, source_tickets.title FROM events, tickets AS source_tickets WHERE events.org_id = 123 AND source_tickets.status = 'open' LIMIT 100", + expected: "SELECT events.target_id, events.created_at, source_tickets.title FROM events, tickets WHERE events.org_id = ? AND source_tickets.status = ? LIMIT ?", + statementMetadata: StatementMetadata{ + Tables: []string{"events", "tickets"}, + Comments: []string{}, + Commands: []string{"SELECT"}, + Procedures: []string{}, + Size: 19, + }, + }, } obfuscator := NewObfuscator( diff --git a/sqllexer_utils.go b/sqllexer_utils.go index 7775e87..0e31cc9 100644 --- a/sqllexer_utils.go +++ b/sqllexer_utils.go @@ -105,6 +105,7 @@ var keywords = []string{ "INTO", "IS", "KEY", + "LATERAL", "LEFT", "LIKE", "LIMIT", diff --git a/testdata/oracle/select/multiple-hints.json b/testdata/oracle/select/multiple-hints.json index deddd94..4cc6b50 100644 --- a/testdata/oracle/select/multiple-hints.json +++ b/testdata/oracle/select/multiple-hints.json @@ -4,8 +4,8 @@ { "expected": "SELECT e.employee_id, e.first_name, d.department_name FROM employees e, departments d WHERE e.department_id = d.department_id;", "statement_metadata": { - "size": 44, - "tables": ["employees"], + "size": 55, + "tables": ["employees", "departments"], "commands": ["SELECT"], "comments": ["/*+ LEADING(e) USE_HASH(d) */"], "procedures": [] diff --git a/testdata/oracle/select/select-with-oracle-specific-joins.json b/testdata/oracle/select/select-with-oracle-specific-joins.json index 06b165a..15961ba 100644 --- a/testdata/oracle/select/select-with-oracle-specific-joins.json +++ b/testdata/oracle/select/select-with-oracle-specific-joins.json @@ -4,8 +4,8 @@ { "expected": "SELECT e.employee_id, e.last_name, d.department_name FROM employees e, departments d WHERE e.department_id = d.department_id ( + )", "statement_metadata": { - "size": 15, - "tables": ["employees"], + "size": 26, + "tables": ["employees", "departments"], "commands": ["SELECT"], "comments": [], "procedures": [] diff --git a/testdata/oracle/select/use-nl-hint.json b/testdata/oracle/select/use-nl-hint.json index a84e0ad..8fb9827 100644 --- a/testdata/oracle/select/use-nl-hint.json +++ b/testdata/oracle/select/use-nl-hint.json @@ -4,8 +4,8 @@ { "expected": "SELECT e.employee_id, e.first_name, d.department_name FROM employees e, departments d WHERE e.department_id = d.department_id;", "statement_metadata": { - "size": 33, - "tables": ["employees"], + "size": 44, + "tables": ["employees", "departments"], "commands": ["SELECT"], "comments": ["/*+ USE_NL(e d) */"], "procedures": []