From 9d54c687ff3e68c2caf579799cee328bdccc76a1 Mon Sep 17 00:00:00 2001 From: meiyi Date: Mon, 16 Mar 2026 11:11:35 +0800 Subject: [PATCH] add doc for group_commit_mode table property --- .../import/group-commit-manual.md | 65 +++++++++++++++++++ .../import/group-commit-manual.md | 64 ++++++++++++++++++ .../import/group-commit-manual.md | 64 ++++++++++++++++++ .../import/group-commit-manual.md | 64 ++++++++++++++++++ 4 files changed, 257 insertions(+) diff --git a/docs/data-operate/import/group-commit-manual.md b/docs/data-operate/import/group-commit-manual.md index 1844c80d51ec9..64e657c2ff3d7 100644 --- a/docs/data-operate/import/group-commit-manual.md +++ b/docs/data-operate/import/group-commit-manual.md @@ -52,6 +52,65 @@ PROPERTIES ( ); ``` +### Table Property Configuration + +You can set the default Group Commit mode at the table level. When Stream Load does not set the `group_commit` HTTP Header, the mode from the table property will be used. + +**Configure during table creation:** + +```sql +CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1", + "group_commit_mode" = "async_mode" +); +``` + +**Modify table property:** + +```sql +# Modify to synchronous mode +ALTER TABLE dt SET ("group_commit_mode" = "sync_mode"); + +# Disable Group Commit +ALTER TABLE dt SET ("group_commit_mode" = "off_mode"); +``` + +**View table property:** + +`SHOW CREATE TABLE` displays the `group_commit_mode` property (unless the value is `off_mode`): + +```sql +mysql> SHOW CREATE TABLE dt; ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dt | CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "group_commit_mode" = "async_mode" +) | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +**Priority Description:** + +- For Stream Load: If the `group_commit` HTTP Header is set, the Header value takes priority; otherwise, the table property value is used +- For INSERT INTO VALUES: Session variable `group_commit` has higher priority than table property + + ### Using JDBC When users write using JDBC's `insert into values` method, to reduce SQL parsing and planning overhead, we support MySQL protocol's `PreparedStatement` feature on the FE side. When using `PreparedStatement`, SQL and its load plan are cached in session-level memory cache, and subsequent loads directly use the cached objects, reducing FE CPU pressure. Here's an example of using `PreparedStatement` in JDBC: @@ -233,6 +292,8 @@ func logInsertStatistics() { ### INSERT INTO VALUES +Enable Group Commit by setting the Session variable `group_commit`. Session variables have higher priority than table properties. + * Asynchronous Mode ```sql @@ -298,6 +359,10 @@ mysql> set group_commit = off_mode; ### Stream Load +When importing via Stream Load, you can enable Group Commit by setting the `group_commit` parameter in the HTTP Header. + +**Note**: If the `group_commit` Header is not set but `group_commit_mode` is configured in the table property, the table property mode will be used automatically. + Assuming `data.csv` contains: ```sql diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/data-operate/import/group-commit-manual.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/data-operate/import/group-commit-manual.md index f2ce02c9d1345..1e694c54810a7 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/current/data-operate/import/group-commit-manual.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/data-operate/import/group-commit-manual.md @@ -52,6 +52,64 @@ PROPERTIES ( ); ``` +### 表属性配置 + +可以在表级别设置默认的 Group Commit 模式。当 Stream Load 未设置 `group_commit` HTTP Header 时,将使用表属性中的模式。 + +**建表时配置:** + +```sql +CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1", + "group_commit_mode" = "async_mode" +); +``` + +**修改表属性:** + +```sql +# 修改为同步模式 +ALTER TABLE dt SET ("group_commit_mode" = "sync_mode"); + +# 关闭 Group Commit +ALTER TABLE dt SET ("group_commit_mode" = "off_mode"); +``` + +**查看表属性:** + +`SHOW CREATE TABLE` 会显示 `group_commit_mode` 属性(除非值为 `off_mode`): + +```sql +mysql> SHOW CREATE TABLE dt; ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dt | CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "group_commit_mode" = "async_mode" +) | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +**优先级说明:** + +- 对于 Stream Load:如果设置了 `group_commit` HTTP Header,则优先使用 Header 中的值;否则使用表属性中的值 +- 对于 INSERT INTO VALUES:Session 变量 `group_commit` 的优先级高于表属性 + ### 使用 JDBC 当用户使用 JDBC `insert into values`方式写入时,为了减少 SQL 解析和生成规划的开销,我们在 FE 端支持了 MySQL 协议的 `PreparedStatement` 特性。当使用 `PreparedStatement` 时,SQL 和其导入规划将被缓存到 Session 级别的内存缓存中,后续的导入直接使用缓存对象,降低了 FE 的 CPU 压力。下面是在 JDBC 中使用 `PreparedStatement` 的例子: @@ -233,6 +291,8 @@ func logInsertStatistics() { ### INSERT INTO VALUES +通过设置 Session 变量 `group_commit` 来启用 Group Commit。Session 变量的优先级高于表属性。 + * 异步模式 ```sql @@ -298,6 +358,10 @@ mysql> set group_commit = off_mode; ### Stream Load +通过 Stream Load 进行导入时,可以在 HTTP Header 中设置 `group_commit` 参数来启用 Group Commit。 + +**注意**:如果未设置 `group_commit` Header 且表属性中配置了 `group_commit_mode`,则自动使用表属性中的模式。 + 假如`data.csv`的内容为: ```sql diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/data-operate/import/group-commit-manual.md b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/data-operate/import/group-commit-manual.md index f2ce02c9d1345..1e694c54810a7 100644 --- a/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/data-operate/import/group-commit-manual.md +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/version-4.x/data-operate/import/group-commit-manual.md @@ -52,6 +52,64 @@ PROPERTIES ( ); ``` +### 表属性配置 + +可以在表级别设置默认的 Group Commit 模式。当 Stream Load 未设置 `group_commit` HTTP Header 时,将使用表属性中的模式。 + +**建表时配置:** + +```sql +CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1", + "group_commit_mode" = "async_mode" +); +``` + +**修改表属性:** + +```sql +# 修改为同步模式 +ALTER TABLE dt SET ("group_commit_mode" = "sync_mode"); + +# 关闭 Group Commit +ALTER TABLE dt SET ("group_commit_mode" = "off_mode"); +``` + +**查看表属性:** + +`SHOW CREATE TABLE` 会显示 `group_commit_mode` 属性(除非值为 `off_mode`): + +```sql +mysql> SHOW CREATE TABLE dt; ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dt | CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "group_commit_mode" = "async_mode" +) | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +**优先级说明:** + +- 对于 Stream Load:如果设置了 `group_commit` HTTP Header,则优先使用 Header 中的值;否则使用表属性中的值 +- 对于 INSERT INTO VALUES:Session 变量 `group_commit` 的优先级高于表属性 + ### 使用 JDBC 当用户使用 JDBC `insert into values`方式写入时,为了减少 SQL 解析和生成规划的开销,我们在 FE 端支持了 MySQL 协议的 `PreparedStatement` 特性。当使用 `PreparedStatement` 时,SQL 和其导入规划将被缓存到 Session 级别的内存缓存中,后续的导入直接使用缓存对象,降低了 FE 的 CPU 压力。下面是在 JDBC 中使用 `PreparedStatement` 的例子: @@ -233,6 +291,8 @@ func logInsertStatistics() { ### INSERT INTO VALUES +通过设置 Session 变量 `group_commit` 来启用 Group Commit。Session 变量的优先级高于表属性。 + * 异步模式 ```sql @@ -298,6 +358,10 @@ mysql> set group_commit = off_mode; ### Stream Load +通过 Stream Load 进行导入时,可以在 HTTP Header 中设置 `group_commit` 参数来启用 Group Commit。 + +**注意**:如果未设置 `group_commit` Header 且表属性中配置了 `group_commit_mode`,则自动使用表属性中的模式。 + 假如`data.csv`的内容为: ```sql diff --git a/versioned_docs/version-4.x/data-operate/import/group-commit-manual.md b/versioned_docs/version-4.x/data-operate/import/group-commit-manual.md index f086e4eb9917d..f65634de69e88 100644 --- a/versioned_docs/version-4.x/data-operate/import/group-commit-manual.md +++ b/versioned_docs/version-4.x/data-operate/import/group-commit-manual.md @@ -52,6 +52,64 @@ PROPERTIES ( ); ``` +### Table Property Configuration + +You can set the default Group Commit mode at the table level. When Stream Load does not set the `group_commit` HTTP Header, the mode from the table property will be used. + +**Configure during table creation:** + +```sql +CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "replication_num" = "1", + "group_commit_mode" = "async_mode" +); +``` + +**Modify table property:** + +```sql +# Modify to synchronous mode +ALTER TABLE dt SET ("group_commit_mode" = "sync_mode"); + +# Disable Group Commit +ALTER TABLE dt SET ("group_commit_mode" = "off_mode"); +``` + +**View table property:** + +`SHOW CREATE TABLE` displays the `group_commit_mode` property (unless the value is `off_mode`): + +```sql +mysql> SHOW CREATE TABLE dt; ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| dt | CREATE TABLE `dt` ( + `id` int(11) NOT NULL, + `name` varchar(50) NULL, + `score` int(11) NULL +) ENGINE=OLAP +DUPLICATE KEY(`id`) +DISTRIBUTED BY HASH(`id`) BUCKETS 1 +PROPERTIES ( + "group_commit_mode" = "async_mode" +) | ++-------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +**Priority Description:** + +- For Stream Load: If the `group_commit` HTTP Header is set, the Header value takes priority; otherwise, the table property value is used +- For INSERT INTO VALUES: Session variable `group_commit` has higher priority than table property + ### Using JDBC When users write using JDBC's `insert into values` method, to reduce SQL parsing and planning overhead, we support MySQL protocol's `PreparedStatement` feature on the FE side. When using `PreparedStatement`, SQL and its load plan are cached in session-level memory cache, and subsequent loads directly use the cached objects, reducing FE CPU pressure. Here's an example of using `PreparedStatement` in JDBC: @@ -233,6 +291,8 @@ func logInsertStatistics() { ### INSERT INTO VALUES +Enable Group Commit by setting the Session variable `group_commit`. Session variables have higher priority than table properties. + * Asynchronous Mode ```sql @@ -298,6 +358,10 @@ mysql> set group_commit = off_mode; ### Stream Load +When importing via Stream Load, you can enable Group Commit by setting the `group_commit` parameter in the HTTP Header. + +**Note**: If the `group_commit` Header is not set but `group_commit_mode` is configured in the table property, the table property mode will be used automatically. + Assuming `data.csv` contains: ```sql