Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3,637 changes: 1,875 additions & 1,762 deletions src/parser/bison_parser.cpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/parser/bison_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,11 @@ union HSQL_STYPE
hsql::RowLockWaitPolicy lock_wait_policy_t;

hsql::ImportExportOptions* import_export_option_t;
hsql::CsvImportExportOptions* csv_import_export_option_t;

// clang-format off

#line 351 "bison_parser.h"
#line 352 "bison_parser.h"

};
typedef union HSQL_STYPE HSQL_STYPE;
Expand Down
93 changes: 92 additions & 1 deletion src/parser/bison_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
hsql::RowLockWaitPolicy lock_wait_policy_t;

hsql::ImportExportOptions* import_export_option_t;
hsql::CsvImportExportOptions* csv_import_export_option_t;

// clang-format off
}
Expand Down Expand Up @@ -292,6 +293,7 @@
// ImportType is used for compatibility reasons
%type <import_type_t> file_type
%type <import_export_option_t> opt_import_export_options import_export_options
%type <csv_import_export_option_t> csv_import_export_options

%type <str_vec> ident_commalist opt_column_list
%type <expr_vec> expr_list select_list opt_extended_literal_list extended_literal_list hint_list opt_hints opt_partition
Expand Down Expand Up @@ -467,6 +469,10 @@ import_statement : IMPORT FROM file_type FILE file_path INTO table_name {
$$->encoding = $5->encoding;
$5->encoding = nullptr;
}
if ($5->csv_options) {
$$->csv_options = $5->csv_options;
$5->csv_options = nullptr;
}
delete $5;
};

Expand Down Expand Up @@ -497,6 +503,11 @@ import_export_options : import_export_options ',' FORMAT file_type {
yyerror(&yyloc, result, scanner, "File type must only be provided once.");
YYERROR;
}
if ($1->csv_options && $4 != kImportCSV && $4 != kImportAuto) {
delete $1;
yyerror(&yyloc, result, scanner, "Cannot have CSV options (DELIMITER, NULL, QUOTE) without CSV import type.");
YYERROR;
}
$1->format = $4;
$$ = $1;
}
Expand All @@ -517,7 +528,79 @@ import_export_options : import_export_options ',' FORMAT file_type {
| ENCODING STRING {
$$ = new ImportExportOptions{};
$$->encoding = $2;
};
}
| import_export_options ',' csv_import_export_options {
if ($1->format != kImportAuto && $1->format != kImportCSV) {
delete $1;
delete $3;
yyerror(&yyloc, result, scanner, "Cannot have CSV options (DELIMITER, NULL, QUOTE) without CSV import type.");
YYERROR;
}

if ($1->csv_options) {
if ($1->csv_options->delimiter && $3->delimiter) {
delete $1;
delete $3;
yyerror(&yyloc, result, scanner, "Delimiter must only be provided once.");
YYERROR;
}
if ($1->csv_options->null && $3->null) {
delete $1;
delete $3;
yyerror(&yyloc, result, scanner, "Null string must only be provided once.");
YYERROR;
}
if ($1->csv_options->quote && $3->quote) {
delete $1;
delete $3;
yyerror(&yyloc, result, scanner, "Quote must only be provided once.");
YYERROR;
}

if ($3->delimiter) {
$1->csv_options->delimiter = $3->delimiter;
$3->delimiter = nullptr;
}
if ($3->null) {
$1->csv_options->null = $3->null;
$3->null = nullptr;

}
if ($3->quote) {
$1->csv_options->quote = $3->quote;
$3->quote = nullptr;
}
delete $3;
} else {
$1->csv_options = $3;
}

$$ = $1;
}
| csv_import_export_options {
$$ = new ImportExportOptions{};
$$->csv_options = $1;
}

csv_import_export_options : IDENTIFIER STRING {
$$ = new CsvImportExportOptions{};
if (strcasecmp($1, "DELIMITER") == 0) {
$$->delimiter = $2;
} else if (strcasecmp($1, "QUOTE") == 0) {
$$->quote = $2;
} else {
delete $$;
free($1);
free($2);
yyerror(&yyloc, result, scanner, "Unknown identifier when parsing CSV options.");
YYERROR;
}
free($1);
}
| NULL STRING {
$$ = new CsvImportExportOptions{};
$$->null = $2;
}

/******************************
* Export Statement
Expand All @@ -533,6 +616,10 @@ export_statement : COPY table_name TO file_path opt_import_export_options {
$$->encoding = $5->encoding;
$5->encoding = nullptr;
}
if ($5->csv_options) {
$$->csv_options = $5->csv_options;
$5->csv_options = nullptr;
}
delete $5;
}
| COPY select_with_paren TO file_path opt_import_export_options {
Expand All @@ -543,6 +630,10 @@ export_statement : COPY table_name TO file_path opt_import_export_options {
$$->encoding = $5->encoding;
$5->encoding = nullptr;
}
if ($5->csv_options) {
$$->csv_options = $5->csv_options;
$5->csv_options = nullptr;
}
delete $5;
};

Expand Down
17 changes: 17 additions & 0 deletions src/sql/CsvImportExportOptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef SQLPARSER_CSV_IMPORT_EXPORT_OPTIONS_H
#define SQLPARSER_CSV_IMPORT_EXPORT_OPTIONS_H

namespace hsql {

struct CsvImportExportOptions {
CsvImportExportOptions();
~CsvImportExportOptions();

char* delimiter;
char* null;
char* quote;
};

} // namespace hsql

#endif
2 changes: 2 additions & 0 deletions src/sql/ExportStatement.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef SQLPARSER_EXPORT_STATEMENT_H
#define SQLPARSER_EXPORT_STATEMENT_H

#include "CsvImportExportOptions.h"
#include "ImportExportOptions.h"
#include "SQLStatement.h"
#include "SelectStatement.h"
Expand All @@ -18,6 +19,7 @@ struct ExportStatement : SQLStatement {
char* tableName;
SelectStatement* select;
char* encoding;
CsvImportExportOptions* csv_options;
};

} // namespace hsql
Expand Down
3 changes: 3 additions & 0 deletions src/sql/ImportExportOptions.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef SQLPARSER_IMPORT_EXPORT_OPTIONS_H
#define SQLPARSER_IMPORT_EXPORT_OPTIONS_H

#include "CsvImportExportOptions.h"

namespace hsql {

// Name unchanged for compatibility. Historically, this was only used for import statements before we introduced export
Expand All @@ -19,6 +21,7 @@ struct ImportExportOptions {

ImportType format;
char* encoding;
CsvImportExportOptions* csv_options;
};

} // namespace hsql
Expand Down
2 changes: 2 additions & 0 deletions src/sql/ImportStatement.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef SQLPARSER_IMPORT_STATEMENT_H
#define SQLPARSER_IMPORT_STATEMENT_H

#include "CsvImportExportOptions.h"
#include "ImportExportOptions.h"
#include "SQLStatement.h"

Expand All @@ -17,6 +18,7 @@ struct ImportStatement : SQLStatement {
char* tableName;
Expr* whereClause;
char* encoding;
CsvImportExportOptions* csv_options;
};

} // namespace hsql
Expand Down
22 changes: 18 additions & 4 deletions src/sql/statements.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,31 @@ ExportStatement::ExportStatement(ImportType type)
schema(nullptr),
tableName(nullptr),
select(nullptr),
encoding(nullptr) {}
encoding(nullptr),
csv_options(nullptr) {}

ExportStatement::~ExportStatement() {
free(filePath);
free(schema);
free(tableName);
delete select;
free(encoding);
delete csv_options;
}

ImportExportOptions::ImportExportOptions() : format(kImportAuto), encoding(nullptr) {}
CsvImportExportOptions::CsvImportExportOptions() : delimiter(nullptr), null(nullptr), quote(nullptr) {}
CsvImportExportOptions::~CsvImportExportOptions() {
free(delimiter);
free(null);
free(quote);
}

ImportExportOptions::ImportExportOptions() : format(kImportAuto), encoding(nullptr), csv_options(nullptr) {}

ImportExportOptions::~ImportExportOptions() { free(encoding); }
ImportExportOptions::~ImportExportOptions() {
free(encoding);
delete csv_options;
}

// ImportStatement
ImportStatement::ImportStatement(ImportType type)
Expand All @@ -162,14 +174,16 @@ ImportStatement::ImportStatement(ImportType type)
schema(nullptr),
tableName(nullptr),
whereClause(nullptr),
encoding(nullptr) {}
encoding(nullptr),
csv_options(nullptr) {}

ImportStatement::~ImportStatement() {
free(filePath);
free(schema);
free(tableName);
delete whereClause;
free(encoding);
delete csv_options;
}

// InsertStatement
Expand Down
1 change: 1 addition & 0 deletions test/queries/queries-bad.sql
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
!SELECT * FROM t WHERE a = DATE '2000-01-01' + x DAYS;
!SELECT * FROM t WHERE a = DATE '2000-01-01' + INTERVAL 'x' DAY;
!SELECT * FROM t WHERE a = DATE '2000-01-01' + INTERVAL '3.3 DAYS';
!COPY students FROM 'file_path' WITH (FORMAT TBL, DELIMITER '|', NULL '', QUOTE '"'); # Cannot have CSV options with non-CSV format
Copy link
Member

@dey4ss dey4ss Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add further invalid cases (e.g., options passed multiple times, order of format and CSV options, ...)?

Copy link
Member

@dey4ss dey4ss Aug 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really necessary to ensure we clean up everything and do not leak when we call YYERROR. Please have a look at every path where you do that and add a case fot that, either in a full cpp test or here, so it will be triggered when we do leak checking.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I think I got every path (that is not identical to another path)

# ON is not supported by postgres. We follow postgres here since the sql-92 standard does not specify index
# implementation details.
!DROP INDEX myindex ON mytable;
Expand Down
1 change: 1 addition & 0 deletions test/queries/queries-good.sql
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ COPY students FROM 'file_path' WITH (FORMAT TBL);
COPY students FROM 'file_path' WITH (FORMAT CSV);
COPY students FROM 'file_path' WITH (FORMAT BIN);
COPY students FROM 'file_path' WITH (FORMAT BINARY);
COPY students FROM 'file_path' WITH (FORMAT CSV, DELIMITER '|', NULL '', QUOTE '"');
COPY students FROM 'file_path' (FORMAT TBL);
COPY good_students FROM 'file_path' WHERE grade > (SELECT AVG(grade) from alumni);
COPY students TO 'student.tbl';
Expand Down
20 changes: 15 additions & 5 deletions test/sql_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,16 +466,23 @@ TEST(ImportStatementTest) {
}

TEST(CopyStatementTest) {
TEST_PARSE_SINGLE_SQL("COPY students FROM 'students_file' WITH (FORMAT BINARY);", kStmtImport, ImportStatement,
import_result, import_stmt);
TEST_PARSE_SINGLE_SQL("COPY students FROM 'students_file' WITH (FORMAT CSV, DELIMITER '|', NULL '', QUOTE '\"');",
kStmtImport, ImportStatement, import_result, import_stmt);

ASSERT_EQ(import_stmt->type, kImportBinary);
ASSERT_EQ(import_stmt->type, kImportCSV);
ASSERT_NOTNULL(import_stmt->tableName);
ASSERT_STREQ(import_stmt->tableName, "students");
ASSERT_NOTNULL(import_stmt->filePath);
ASSERT_STREQ(import_stmt->filePath, "students_file");
ASSERT_NULL(import_stmt->whereClause);
ASSERT_NULL(import_stmt->encoding);
ASSERT_NOTNULL(import_stmt->csv_options);
ASSERT_NOTNULL(import_stmt->csv_options->delimiter);
ASSERT_STREQ(import_stmt->csv_options->delimiter, "|");
ASSERT_NOTNULL(import_stmt->csv_options->null);
ASSERT_STREQ(import_stmt->csv_options->null, "");
ASSERT_NOTNULL(import_stmt->csv_options->quote);
ASSERT_STREQ(import_stmt->csv_options->quote, "\"");

TEST_PARSE_SINGLE_SQL("COPY students FROM 'students_file' WHERE lastname = 'Potter';", kStmtImport, ImportStatement,
import_filter_result, import_filter_stmt);
Expand All @@ -492,17 +499,19 @@ TEST(CopyStatementTest) {
ASSERT_EQ(import_filter_stmt->whereClause->expr2->type, kExprLiteralString);
ASSERT_STREQ(import_filter_stmt->whereClause->expr2->name, "Potter");
ASSERT_NULL(import_filter_stmt->encoding);
ASSERT_NULL(import_filter_stmt->csv_options);

TEST_PARSE_SINGLE_SQL("COPY students TO 'students_file' WITH (ENCODING 'FSST', FORMAT CSV);", kStmtExport,
TEST_PARSE_SINGLE_SQL("COPY students TO 'students_file' WITH (ENCODING 'FSST', FORMAT BINARY);", kStmtExport,
ExportStatement, export_table_result, export_table_stmt);

ASSERT_EQ(export_table_stmt->type, kImportCSV);
ASSERT_EQ(export_table_stmt->type, kImportBinary);
ASSERT_NOTNULL(export_table_stmt->tableName);
ASSERT_STREQ(export_table_stmt->tableName, "students");
ASSERT_NOTNULL(export_table_stmt->filePath);
ASSERT_STREQ(export_table_stmt->filePath, "students_file");
ASSERT_NULL(export_table_stmt->select);
ASSERT_STREQ(export_table_stmt->encoding, "FSST");
ASSERT_NULL(export_table_stmt->csv_options);

TEST_PARSE_SINGLE_SQL(
"COPY (SELECT firstname, lastname FROM students) TO 'students_file' WITH (ENCODING 'Dictionary');", kStmtExport,
Expand All @@ -513,6 +522,7 @@ TEST(CopyStatementTest) {
ASSERT_NOTNULL(export_select_stmt->filePath);
ASSERT_STREQ(export_select_stmt->filePath, "students_file");
ASSERT_STREQ(export_select_stmt->encoding, "Dictionary");
ASSERT_NULL(export_select_stmt->csv_options);

ASSERT_NOTNULL(export_select_stmt->select);
const auto& select_stmt = export_select_stmt->select;
Expand Down
Loading