From 19ea54d68e8396a81eb1cd6c3c3c0f33fc4ec0a0 Mon Sep 17 00:00:00 2001 From: lichi Date: Fri, 16 Jan 2026 16:38:27 +0800 Subject: [PATCH] [feature](asof-join)support asof join --- docs/query-data/asof-join.md | 118 +++++++++++++++++ .../current/query-data/asof-join.md | 124 ++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 docs/query-data/asof-join.md create mode 100644 i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/asof-join.md diff --git a/docs/query-data/asof-join.md b/docs/query-data/asof-join.md new file mode 100644 index 0000000000000..1ac11cdaf9df2 --- /dev/null +++ b/docs/query-data/asof-join.md @@ -0,0 +1,118 @@ +--- +{ 'title': 'ASOF Join', 'language': 'en' } +--- + +## ASOF JOIN + +**ASOF JOIN** operation merges rows from two tables based on **adjacent** or **exact matching** timestamp values. For each row in the left table, the join finds a row in the right table with the **closest** timestamp. The qualifying row in the right table is the best match, and its timestamp may be **equal to**, **earlier than**, or **later than** the timestamp of the row in the left table, depending on the specified comparison operator. + +### Syntax + +```SQL +FROM ASOF [INNER|LEFT] JOIN + MATCH_CONDITION ( ) + [ ON = [ AND ... ] | USING ( ) ] +``` + +### Parameter Description + +The left table specified in the `FROM` clause is assumed to have timestamps that are **chronologically connected**, **earlier than**, or **completely synchronized** with the timestamps of records in the right table. When a row in the left table has no matching row in the right table, if it is an ASOF INNER JOIN, the row is filtered out; if it is an ASOF LEFT JOIN, the left table row is retained and the corresponding columns in the right table are filled with `NULL`. + +``` +MATCH_CONDITION ( ) +``` + +This condition is used to specify the **timestamp columns** to be compared in the two tables. + +1. Only the following four **comparison operators** are supported: `>=`, `<=`, `>`, `<`. +2. Supported time-related data types include: `DATE`, `DATETIME`, `TIMESTAMP_TZ`. +3. The data types of the two matching columns **do not need to be exactly the same**, but they must be **mutually compatible**. + +``` +ON = [ AND ... ] | USING ( ) +``` + +The optional `ON` or `USING` clause is used to define one or more **equality conditions** between the two tables, aiming to logically group the query results. + +Special notes applicable to `ASOF JOIN` are as follows: + +1. The comparison operator in the `ON` clause **must be the equal sign** **`=`**. +2. The `ON` clause **does not allow conditions connected by logical OR (`OR`)**, only conditions connected by logical AND (`AND`) are supported. +3. Each side of each condition **can only reference one of the two tables in the join** respectively. + +### Examples + +```SQL +CREATE TABLE asof_trades ( + trade_id INT, + symbol VARCHAR(10), + trade_time DATETIME, + price DECIMAL(10, 2), + quantity INT +) DISTRIBUTED BY HASH(trade_id) BUCKETS 3 +PROPERTIES("replication_num" = "1"); + +CREATE TABLE asof_quotes ( + quote_id INT, + symbol VARCHAR(10), + quote_time DATETIME, + bid_price DECIMAL(10, 2), + ask_price DECIMAL(10, 2) +) DISTRIBUTED BY HASH(quote_id) BUCKETS 3 +PROPERTIES("replication_num" = "1"); + +INSERT INTO asof_trades VALUES +(1, 'AAPL', '2024-01-01 10:00:05', 150.50, 100), +(2, 'AAPL', '2024-01-01 10:00:15', 151.00, 200), +(3, 'AAPL', '2024-01-01 10:00:25', 150.75, 150), +(4, 'GOOG', '2024-01-01 10:00:10', 2800.00, 50), +(5, 'GOOG', '2024-01-01 10:00:20', 2805.00, 75), +(6, 'MSFT', '2024-01-01 10:00:08', 380.00, 120); + +INSERT INTO asof_quotes VALUES +(1, 'AAPL', '2024-01-01 10:00:00', 150.00, 150.10), +(2, 'AAPL', '2024-01-01 10:00:10', 150.40, 150.60), +(3, 'AAPL', '2024-01-01 10:00:20', 150.90, 151.10), +(4, 'GOOG', '2024-01-01 10:00:05', 2795.00, 2800.00), +(5, 'GOOG', '2024-01-01 10:00:15', 2802.00, 2808.00), +(6, 'MSFT', '2024-01-01 10:00:00', 378.00, 380.00), +(7, 'MSFT', '2024-01-01 10:00:10', 379.50, 381.00); + +-- ASOF LEFT JOIN +SELECT t.trade_id, t.symbol, t.trade_time, t.price, + q.quote_id, q.quote_time, q.bid_price +FROM asof_trades t +ASOF LEFT JOIN asof_quotes q +MATCH_CONDITION(t.trade_time <= q.quote_time) +ON t.symbol = q.symbol +ORDER BY t.trade_id; ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| trade_id | symbol | trade_time | price | quote_id | quote_time | bid_price | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| 1 | AAPL | 2024-01-01 10:00:05 | 150.50 | 2 | 2024-01-01 10:00:10 | 150.40 | +| 2 | AAPL | 2024-01-01 10:00:15 | 151.00 | 3 | 2024-01-01 10:00:20 | 150.90 | +| 3 | AAPL | 2024-01-01 10:00:25 | 150.75 | NULL | NULL | NULL | +| 4 | GOOG | 2024-01-01 10:00:10 | 2800.00 | 5 | 2024-01-01 10:00:15 | 2802.00 | +| 5 | GOOG | 2024-01-01 10:00:20 | 2805.00 | NULL | NULL | NULL | +| 6 | MSFT | 2024-01-01 10:00:08 | 380.00 | 7 | 2024-01-01 10:00:10 | 379.50 | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +6 rows in set (0.04 sec) + +-- ASOF INNER JOIN +SELECT t.trade_id, t.symbol, t.trade_time, t.price, + q.quote_id, q.quote_time, q.bid_price +FROM asof_trades t +ASOF INNER JOIN asof_quotes q +MATCH_CONDITION(t.trade_time <= q.quote_time) +ON t.symbol = q.symbol +ORDER BY t.trade_id; ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| trade_id | symbol | trade_time | price | quote_id | quote_time | bid_price | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| 1 | AAPL | 2024-01-01 10:00:05 | 150.50 | 2 | 2024-01-01 10:00:10 | 150.40 | +| 2 | AAPL | 2024-01-01 10:00:15 | 151.00 | 3 | 2024-01-01 10:00:20 | 150.90 | +| 4 | GOOG | 2024-01-01 10:00:10 | 2800.00 | 5 | 2024-01-01 10:00:15 | 2802.00 | +| 6 | MSFT | 2024-01-01 10:00:08 | 380.00 | 7 | 2024-01-01 10:00:10 | 379.50 | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +4 rows in set (0.04 sec) +``` \ No newline at end of file diff --git a/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/asof-join.md b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/asof-join.md new file mode 100644 index 0000000000000..18ed2fd0dd2b2 --- /dev/null +++ b/i18n/zh-CN/docusaurus-plugin-content-docs/current/query-data/asof-join.md @@ -0,0 +1,124 @@ +--- +{ + "title": "ASOF连接(ASOF-JOIN)", + "language": "zh-CN" +} + + +--- + +## ASOF JOIN + +**时序匹配连接(ASOF JOIN)** 操作会基于**前后相邻**或**完全匹配**的时间戳值,对两个表中的行进行合并。对于左表中的每一行,该连接会在右表中找到一行**时间戳最接近**的记录。右表中符合条件的行即为最匹配项,根据指定的比较运算符,该行的时间戳可能与左表行的时间戳**相等**、**早于**或**晚于**左表行的时间戳。 + +### 语法 + +```SQL +FROM ASOF [INNER|LEFT] JOIN + MATCH_CONDITION ( ) + [ ON = [ AND ... ] | USING ( ) ] +``` + +### 参数说明 + +`FROM` 子句中指定的左表,其记录的时间戳被假定为与右表记录的时间戳**时序衔接**、**早于**或**完全同步**。当左表中的某一行在右表中无匹配项时,如果是ASOF INNER JOIN则过滤掉该行,如果是ASOF LEFT JOIN,则保留左表行且右表对应的列会填充为 `NULL`。 + +``` +MATCH_CONDITION ( ) +``` + +该条件用于指定两个表中需要进行比较的**时间戳列**。 + +1. 比较运算符**仅支持以下四种**:`>=`、`<=`、`>`、`<`。 +2. 支持的时间相关数据类型包括:`DATE`、`DATETIME`、`TIMESTAMP_TZ`。 +3. 两个匹配列的数据类型**无需完全一致**,但必须**相互兼容**。 + +``` +ON = [ AND ... ] | USING ( ) +``` + +可选的 `ON` 或 `USING` 子句用于定义两个表之间的一个或多个**等值条件**,目的是对查询结果进行逻辑分组。 + +适用于 `ASOF JOIN` 的特殊说明如下: + +1. `ON` 子句中的比较运算符**必须为等号** **`=`**。 +2. `ON` 子句中**不允许包含逻辑或(`OR`)连接的条件**,仅支持逻辑与(`AND`)连接的条件。 +3. 每个条件的两侧**只能分别引用连接中的两个表之一**。 + +### 样例 + +```SQL +CREATE TABLE asof_trades ( + trade_id INT, + symbol VARCHAR(10), + trade_time DATETIME, + price DECIMAL(10, 2), + quantity INT +) DISTRIBUTED BY HASH(trade_id) BUCKETS 3 +PROPERTIES("replication_num" = "1"); + +CREATE TABLE asof_quotes ( + quote_id INT, + symbol VARCHAR(10), + quote_time DATETIME, + bid_price DECIMAL(10, 2), + ask_price DECIMAL(10, 2) +) DISTRIBUTED BY HASH(quote_id) BUCKETS 3 +PROPERTIES("replication_num" = "1"); + +INSERT INTO asof_trades VALUES +(1, 'AAPL', '2024-01-01 10:00:05', 150.50, 100), +(2, 'AAPL', '2024-01-01 10:00:15', 151.00, 200), +(3, 'AAPL', '2024-01-01 10:00:25', 150.75, 150), +(4, 'GOOG', '2024-01-01 10:00:10', 2800.00, 50), +(5, 'GOOG', '2024-01-01 10:00:20', 2805.00, 75), +(6, 'MSFT', '2024-01-01 10:00:08', 380.00, 120); + +INSERT INTO asof_quotes VALUES +(1, 'AAPL', '2024-01-01 10:00:00', 150.00, 150.10), +(2, 'AAPL', '2024-01-01 10:00:10', 150.40, 150.60), +(3, 'AAPL', '2024-01-01 10:00:20', 150.90, 151.10), +(4, 'GOOG', '2024-01-01 10:00:05', 2795.00, 2800.00), +(5, 'GOOG', '2024-01-01 10:00:15', 2802.00, 2808.00), +(6, 'MSFT', '2024-01-01 10:00:00', 378.00, 380.00), +(7, 'MSFT', '2024-01-01 10:00:10', 379.50, 381.00); + +-- ASOF LEFT JOIN +SELECT t.trade_id, t.symbol, t.trade_time, t.price, + q.quote_id, q.quote_time, q.bid_price +FROM asof_trades t +ASOF LEFT JOIN asof_quotes q +MATCH_CONDITION(t.trade_time <= q.quote_time) +ON t.symbol = q.symbol +ORDER BY t.trade_id; ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| trade_id | symbol | trade_time | price | quote_id | quote_time | bid_price | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| 1 | AAPL | 2024-01-01 10:00:05 | 150.50 | 2 | 2024-01-01 10:00:10 | 150.40 | +| 2 | AAPL | 2024-01-01 10:00:15 | 151.00 | 3 | 2024-01-01 10:00:20 | 150.90 | +| 3 | AAPL | 2024-01-01 10:00:25 | 150.75 | NULL | NULL | NULL | +| 4 | GOOG | 2024-01-01 10:00:10 | 2800.00 | 5 | 2024-01-01 10:00:15 | 2802.00 | +| 5 | GOOG | 2024-01-01 10:00:20 | 2805.00 | NULL | NULL | NULL | +| 6 | MSFT | 2024-01-01 10:00:08 | 380.00 | 7 | 2024-01-01 10:00:10 | 379.50 | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +6 rows in set (0.04 sec) + +-- ASOF INNER JOIN +SELECT t.trade_id, t.symbol, t.trade_time, t.price, + q.quote_id, q.quote_time, q.bid_price +FROM asof_trades t +ASOF INNER JOIN asof_quotes q +MATCH_CONDITION(t.trade_time <= q.quote_time) +ON t.symbol = q.symbol +ORDER BY t.trade_id; ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| trade_id | symbol | trade_time | price | quote_id | quote_time | bid_price | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +| 1 | AAPL | 2024-01-01 10:00:05 | 150.50 | 2 | 2024-01-01 10:00:10 | 150.40 | +| 2 | AAPL | 2024-01-01 10:00:15 | 151.00 | 3 | 2024-01-01 10:00:20 | 150.90 | +| 4 | GOOG | 2024-01-01 10:00:10 | 2800.00 | 5 | 2024-01-01 10:00:15 | 2802.00 | +| 6 | MSFT | 2024-01-01 10:00:08 | 380.00 | 7 | 2024-01-01 10:00:10 | 379.50 | ++----------+--------+---------------------+---------+----------+---------------------+-----------+ +4 rows in set (0.04 sec) + +``` \ No newline at end of file