Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -4407,6 +4407,12 @@
"LATERAL can only be used with subquery and table-valued functions."
]
},
"MISSING_COLUMN_BEFORE_IN" : {
"message" : [
"Invalid IN predicate: a column or expression must appear before IN and its parenthesized value list.",
"For example, use `WHERE id IN (1, 2)` instead of `WHERE IN (1, 2)` or `WHERE in (1)` with no expression before `in`."
]
},
"MULTI_PART_NAME" : {
"message" : [
"<statement> with multiple part name(<name>) is not allowed."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,10 @@ trait CheckAnalysis extends LookupCatalog with QueryErrorsBase with PlanToString
case u: UnresolvedRelation =>
u.tableNotFound(u.multipartIdentifier)

// Rare: identifier "in" as column + IN (list) when "in" is a valid unquoted identifier.
case f @ Filter(In(UnresolvedAttribute(Seq(name)), _), _) if name.equalsIgnoreCase("in") =>
throw QueryCompilationErrors.missingColumnBeforeInError(f.condition.origin)

case u: UnresolvedFunctionName =>
val catalogPath = currentCatalog.name +: catalogManager.currentNamespace
val searchPath = SQLConf.get.resolutionSearchPath(catalogPath.toSeq)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ object FunctionRegistry {
// predicates
expression[Between]("between"),
expression[And]("and"),
expression[In]("in"),
expressionBuilder("in", InPredicateExpressionBuilder),
expression[Not]("not"),
expression[Or]("or"),

Expand Down Expand Up @@ -1345,6 +1345,29 @@ object TableFunctionRegistry {
*/
trait ExpressionBuilder extends FunctionBuilderBase[Expression]

/**
* SQL `in(col, v1, ...)` as a function. Rejects:
* - `in(v)` — how `WHERE in (v)` is parsed (column omitted before IN list).
* - `in(v1, v2, ...)` when every argument is a [[Literal]] — how
* `WHERE IN ('a','b','c')` / `DELETE ... WHERE IN (...)` is parsed (no column).
* Legitimate `in(col, v1, v2)` has a non-literal first argument (the column).
*/
private[analysis] object InPredicateExpressionBuilder extends ExpressionBuilder {
override def build(funcName: String, expressions: Seq[Expression]): Expression = {
expressions.length match {
case 0 =>
throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2), 0)
case 1 =>
throw QueryCompilationErrors.missingColumnBeforeInError(expressions.head.origin)
case _ =>
if (expressions.forall(_.isInstanceOf[Literal])) {
throw QueryCompilationErrors.missingColumnBeforeInError(expressions.head.origin)
}
In(expressions.head, expressions.tail)
}
}
}

/**
* This is a trait used for table valued functions that defines how their expression
* representations are constructed in [[TableFunctionRegistry]].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
messageParameters = commonParam ++ proposalParam)
}

def missingColumnBeforeInError(origin: Origin): Throwable = {
new AnalysisException(
errorClass = "INVALID_SQL_SYNTAX.MISSING_COLUMN_BEFORE_IN",
messageParameters = Map.empty,
origin = origin
)
}

def unresolvedFieldError(
fieldName: String,
columnPath: Seq[String],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,39 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
"expressionList" -> "max(DISTINCT b)"))
}

test("Error when column name is missing before IN in WHERE clause") {
val a = AttributeReference("a", StringType)()
val plan = Filter(
In(UnresolvedAttribute("in"), Seq(Literal("2024-07-05"))),
LocalRelation(a))
assertAnalysisErrorCondition(plan,
expectedErrorCondition = "INVALID_SQL_SYNTAX.MISSING_COLUMN_BEFORE_IN",
expectedMessageParameters = Map.empty)
}

test("Error when WHERE uses in(v) as function — same as Spark SQL WHERE in (1)") {
val a = AttributeReference("id", IntegerType)()
val plan = Filter(
UnresolvedFunction("in", Seq(Literal(1)), isDistinct = false),
LocalRelation(a))
assertAnalysisErrorCondition(plan,
expectedErrorCondition = "INVALID_SQL_SYNTAX.MISSING_COLUMN_BEFORE_IN",
expectedMessageParameters = Map.empty)
}

test("Error when in(...) has only literals — no column before IN list (e.g. WHERE IN (v1,v2,...))") {
val a = AttributeReference("id", IntegerType)()
val plan = Filter(
UnresolvedFunction(
"in",
Seq(Literal(1), Literal(2), Literal(3)),
isDistinct = false),
LocalRelation(a))
assertAnalysisErrorCondition(plan,
expectedErrorCondition = "INVALID_SQL_SYNTAX.MISSING_COLUMN_BEFORE_IN",
expectedMessageParameters = Map.empty)
}

test("SPARK-30811: CTE should not cause stack overflow when " +
"it refers to non-existent table with same name") {
val plan = UnresolvedWith(
Expand Down