Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 91 additions & 91 deletions bindings/python/example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,8 @@ async def main():
config = fluss.Config(config_spec)

# Create connection using the static create method
conn = await fluss.FlussConnection.create(config)

# Define fields for PyArrow
async with await fluss.FlussConnection.create(config) as conn:
# Define fields for PyArrow
fields = [
pa.field("id", pa.int32()),
Comment on lines 42 to 46
Copy link

Copilot AI Apr 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The async with await FlussConnection.create(...) as conn: block has no executable statement in its body (only a comment), and fields = [...] is dedented. This will raise a SyntaxError (“expected an indented block”) and also ends the connection context immediately. Please indent the subsequent setup logic under the async with (or add a real statement inside the block) so the connection remains open for the rest of main().

Copilot uses AI. Check for mistakes.
pa.field("name", pa.string()),
Expand Down Expand Up @@ -105,8 +104,8 @@ async def main():
print(f"Got table: {table}")

# Create a writer for the table
append_writer = table.new_append().create_writer()
print(f"Created append writer: {append_writer}")
async with table.new_append().create_writer() as append_writer:
print(f"Created append writer: {append_writer}")

try:
# Demo: Write PyArrow Table
Expand Down Expand Up @@ -240,10 +239,10 @@ async def main():
append_writer.write_pandas(df)
print("Successfully wrote Pandas DataFrame")

# Flush all pending data
print("\n--- Flushing data ---")
await append_writer.flush()
print("Successfully flushed data")
# Note: flush() and close() are automatically called by the 'async with' block on successful exit.
Copy link

Copilot AI Apr 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment says flush() and close() are automatically called when leaving the async with block, but the current AppendWriter.__aexit__ implementation only flushes and there is no writer close() in the Python API. Please update the example text to match the actual behavior (or implement/introduce close() and call it from __aexit__).

Suggested change
# Note: flush() and close() are automatically called by the 'async with' block on successful exit.
# Note: flush() is automatically called by the 'async with' block on successful exit.

Copilot uses AI. Check for mistakes.
# To manually flush before the context ends, you can still call:
# await append_writer.flush()
print("\n--- Ending append writer context (auto-flushing) ---")

# Demo: Check offsets after writes
print("\n--- Checking offsets after writes ---")
Expand All @@ -264,8 +263,8 @@ async def main():
print("\n--- Scanning table (batch scanner) ---")
try:
# Use new_scan().create_record_batch_log_scanner() for batch-based operations
batch_scanner = await table.new_scan().create_record_batch_log_scanner()
print(f"Created batch scanner: {batch_scanner}")
async with await table.new_scan().create_record_batch_log_scanner() as batch_scanner:
print(f"Created batch scanner: {batch_scanner}")

# Subscribe to buckets (required before to_arrow/to_pandas)
# Use subscribe_buckets to subscribe all buckets from EARLIEST_OFFSET
Expand All @@ -284,15 +283,15 @@ async def main():
print(f"Could not convert to PyArrow: {e}")

# Create a new batch scanner for to_pandas() test
batch_scanner2 = await table.new_scan().create_record_batch_log_scanner()
batch_scanner2.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})
async with await table.new_scan().create_record_batch_log_scanner() as batch_scanner2:
batch_scanner2.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})

# Try to get as Pandas DataFrame
try:
df_result = batch_scanner2.to_pandas()
print(f"\nAs Pandas DataFrame:\n{df_result}")
except Exception as e:
print(f"Could not convert to Pandas: {e}")
# Try to get as Pandas DataFrame
try:
df_result = batch_scanner2.to_pandas()
print(f"\nAs Pandas DataFrame:\n{df_result}")
except Exception as e:
print(f"Could not convert to Pandas: {e}")

# TODO: support to_arrow_batch_reader()
# which is reserved for streaming use cases
Expand All @@ -301,43 +300,43 @@ async def main():

# Test poll_arrow() method for incremental reading as Arrow Table
print("\n--- Testing poll_arrow() method ---")
batch_scanner3 = await table.new_scan().create_record_batch_log_scanner()
batch_scanner3.subscribe(bucket_id=0, start_offset=fluss.EARLIEST_OFFSET)
print(f"Subscribed to bucket 0 at EARLIEST_OFFSET ({fluss.EARLIEST_OFFSET})")

# Poll with a timeout of 5000ms (5 seconds)
# Note: poll_arrow() returns an empty table (not an error) on timeout
try:
poll_result = batch_scanner3.poll_arrow(5000)
print(f"Number of rows: {poll_result.num_rows}")

if poll_result.num_rows > 0:
poll_df = poll_result.to_pandas()
print(f"Polled data:\n{poll_df}")
else:
print("Empty result (no records available)")
# Empty table still has schema - this is useful!
print(f"Schema: {poll_result.schema}")

except Exception as e:
print(f"Error during poll_arrow: {e}")
async with await table.new_scan().create_record_batch_log_scanner() as batch_scanner3:
batch_scanner3.subscribe(bucket_id=0, start_offset=fluss.EARLIEST_OFFSET)
print(f"Subscribed to bucket 0 at EARLIEST_OFFSET ({fluss.EARLIEST_OFFSET})")

# Poll with a timeout of 5000ms (5 seconds)
# Note: poll_arrow() returns an empty table (not an error) on timeout
try:
poll_result = batch_scanner3.poll_arrow(5000)
print(f"Number of rows: {poll_result.num_rows}")

if poll_result.num_rows > 0:
poll_df = poll_result.to_pandas()
print(f"Polled data:\n{poll_df}")
else:
print("Empty result (no records available)")
# Empty table still has schema - this is useful!
print(f"Schema: {poll_result.schema}")

except Exception as e:
print(f"Error during poll_arrow: {e}")

# Test poll_record_batch() method for batches with metadata
print("\n--- Testing poll_record_batch() method ---")
batch_scanner4 = await table.new_scan().create_record_batch_log_scanner()
batch_scanner4.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})
async with await table.new_scan().create_record_batch_log_scanner() as batch_scanner4:
batch_scanner4.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})

try:
batches = batch_scanner4.poll_record_batch(5000)
print(f"Number of batches: {len(batches)}")
try:
batches = batch_scanner4.poll_record_batch(5000)
print(f"Number of batches: {len(batches)}")

for i, batch in enumerate(batches):
print(f" Batch {i}: bucket={batch.bucket}, "
f"offsets={batch.base_offset}-{batch.last_offset}, "
f"rows={batch.batch.num_rows}")
for i, batch in enumerate(batches):
print(f" Batch {i}: bucket={batch.bucket}, "
f"offsets={batch.base_offset}-{batch.last_offset}, "
f"rows={batch.batch.num_rows}")

except Exception as e:
print(f"Error during poll_record_batch: {e}")
except Exception as e:
print(f"Error during poll_record_batch: {e}")

except Exception as e:
print(f"Error during batch scanning: {e}")
Expand All @@ -346,49 +345,49 @@ async def main():
print("\n--- Scanning table (record scanner) ---")
try:
# Use new_scan().create_log_scanner() for record-based operations
record_scanner = await table.new_scan().create_log_scanner()
print(f"Created record scanner: {record_scanner}")

record_scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})

# Poll returns ScanRecords — records grouped by bucket
print("\n--- Testing poll() method (record-by-record) ---")
try:
scan_records = record_scanner.poll(5000)
print(f"Total records: {scan_records.count()}, buckets: {len(scan_records.buckets())}")

# Flat iteration over all records (regardless of bucket)
print(f" Flat iteration: {scan_records.count()} records")
for record in scan_records:
print(f" offset={record.offset}, timestamp={record.timestamp}")

# Per-bucket access
for bucket in scan_records.buckets():
bucket_recs = scan_records.records(bucket)
print(f" Bucket {bucket}: {len(bucket_recs)} records")
for record in bucket_recs[:3]:
print(f" offset={record.offset}, "
f"timestamp={record.timestamp}, "
f"change_type={record.change_type}, "
f"row={record.row}")

except Exception as e:
print(f"Error during poll: {e}")
async with await table.new_scan().create_log_scanner() as record_scanner:
print(f"Created record scanner: {record_scanner}")

record_scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})

# Poll returns ScanRecords — records grouped by bucket
print("\n--- Testing poll() method (record-by-record) ---")
try:
scan_records = record_scanner.poll(5000)
print(f"Total records: {scan_records.count()}, buckets: {len(scan_records.buckets())}")

# Flat iteration over all records (regardless of bucket)
print(f" Flat iteration: {scan_records.count()} records")
for record in scan_records:
print(f" offset={record.offset}, timestamp={record.timestamp}")

# Per-bucket access
for bucket in scan_records.buckets():
bucket_recs = scan_records.records(bucket)
print(f" Bucket {bucket}: {len(bucket_recs)} records")
for record in bucket_recs[:3]:
print(f" offset={record.offset}, "
f"timestamp={record.timestamp}, "
f"change_type={record.change_type}, "
f"row={record.row}")

except Exception as e:
print(f"Error during poll: {e}")

except Exception as e:
print(f"Error during record scanning: {e}")

# Demo: unsubscribe — unsubscribe from a bucket (non-partitioned tables)
print("\n--- Testing unsubscribe ---")
try:
unsub_scanner = await table.new_scan().create_record_batch_log_scanner()
unsub_scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})
print(f"Subscribed to {num_buckets} buckets")
# Unsubscribe from bucket 0 — future polls will skip this bucket
unsub_scanner.unsubscribe(bucket_id=0)
print("Unsubscribed from bucket 0")
remaining = unsub_scanner.poll_arrow(5000)
print(f"After unsubscribe, got {remaining.num_rows} records (from remaining buckets)")
async with await table.new_scan().create_record_batch_log_scanner() as unsub_scanner:
unsub_scanner.subscribe_buckets({i: fluss.EARLIEST_OFFSET for i in range(num_buckets)})
print(f"Subscribed to {num_buckets} buckets")
# Unsubscribe from bucket 0 — future polls will skip this bucket
unsub_scanner.unsubscribe(bucket_id=0)
print("Unsubscribed from bucket 0")
remaining = unsub_scanner.poll_arrow(5000)
print(f"After unsubscribe, got {remaining.num_rows} records (from remaining buckets)")
except Exception as e:
print(f"Error during unsubscribe test: {e}")

Expand Down Expand Up @@ -437,8 +436,8 @@ async def main():
# --- Test Upsert ---
print("\n--- Testing Upsert (fire-and-forget) ---")
try:
upsert_writer = pk_table.new_upsert().create_writer()
print(f"Created upsert writer: {upsert_writer}")
async with pk_table.new_upsert().create_writer() as upsert_writer:
print(f"Created upsert writer: {upsert_writer}")

# Fire-and-forget: queue writes synchronously, flush at end.
# Records are batched internally for efficiency.
Expand Down Expand Up @@ -489,9 +488,10 @@ async def main():
)
print("Queued user_id=3 (Charlie)")

# flush() waits for all queued writes to be acknowledged by the server
await upsert_writer.flush()
print("Flushed — all 3 rows acknowledged by server")
# flush() and close() are automatically called by the 'async with' block on successful exit.
# Bypass manual flush:
Comment on lines +491 to +492
Copy link

Copilot AI Apr 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment claims the async with block will automatically call both flush() and close(), but UpsertWriter.__aexit__ currently only flushes and does not close/invalidate the writer. Please update the example comment to reflect reality (or add close semantics and invoke them from __aexit__).

Suggested change
# flush() and close() are automatically called by the 'async with' block on successful exit.
# Bypass manual flush:
# flush() is automatically called by the 'async with' block on successful exit.
# No manual flush is needed here:

Copilot uses AI. Check for mistakes.
# await upsert_writer.flush()
print("Ending upsert writer context (auto-flushing)")

# Per-record acknowledgment: await the returned handle to block until
# the server confirms this specific write, useful when you need to
Expand Down
78 changes: 78 additions & 0 deletions bindings/python/fluss/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,13 @@ class FlussConnection:
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> bool: ...
async def __aenter__(self) -> FlussConnection: ...
async def __aexit__(
self,
exc_type: Optional[type],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> bool: ...
def __repr__(self) -> str: ...

class ServerNode:
Expand Down Expand Up @@ -611,6 +618,37 @@ class AppendWriter:
def write_arrow_batch(self, batch: pa.RecordBatch) -> WriteResultHandle: ...
def write_pandas(self, df: pd.DataFrame) -> None: ...
async def flush(self) -> None: ...
async def __aenter__(self) -> AppendWriter:
"""
Enter the async context manager.

Returns:
The AppendWriter instance.
"""
...
async def __aexit__(
self,
exc_type: Optional[type],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> bool:
"""
Exit the async context manager.

On successful exit, the writer is automatically flushed to ensure
all pending records are sent and acknowledged.

Note on Exceptions:
If an exception occurs inside the `async with` block, `flush()` is
bypassed to return control to the event loop immediately. However,
any records already passed to `append()` prior to the exception
reside in a shared background buffer and will still be transmitted
to the server.

To achieve true atomicity, buffer your records in a Python list and
write them in a single batch at the end of your logic.
"""
...
def __repr__(self) -> str: ...

class UpsertWriter:
Expand Down Expand Up @@ -644,6 +682,37 @@ class UpsertWriter:
async def flush(self) -> None:
"""Flush all pending upsert/delete operations to the server."""
...
async def __aenter__(self) -> UpsertWriter:
"""
Enter the async context manager.

Returns:
The UpsertWriter instance.
"""
...
async def __aexit__(
self,
exc_type: Optional[type],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> bool:
"""
Exit the async context manager.

On successful exit, the writer is automatically flushed to ensure
all pending records are sent and acknowledged.

Note on Exceptions:
If an exception occurs inside the `async with` block, `flush()` is
bypassed to return control to the event loop immediately. However,
any records already passed to `upsert()` or `delete()` prior to the
exception reside in a shared background buffer and will still be
transmitted to the server.

To achieve true atomicity, buffer your records in a Python list and
write them in a single batch at the end of your logic.
"""
...
def __repr__(self) -> str: ...


Expand Down Expand Up @@ -807,6 +876,15 @@ class LogScanner:

You must call subscribe(), subscribe_buckets(), or subscribe_partition() first.
"""
...
def close(self) -> None: ...
async def __aenter__(self) -> LogScanner: ...
async def __aexit__(
self,
exc_type: Optional[type],
exc_value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> bool: ...
def __repr__(self) -> str: ...
def __aiter__(self) -> AsyncIterator[Union[ScanRecord, RecordBatch]]: ...

Expand Down
22 changes: 22 additions & 0 deletions bindings/python/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,28 @@ impl FlussConnection {
Ok(false)
}

// Enter the async runtime context (for 'async with' statement)
fn __aenter__<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
let py_slf = slf.into_pyobject(py)?.unbind();
future_into_py(py, async move { Ok(py_slf) })
}

// Exit the async runtime context (for 'async with' statement)
#[pyo3(signature = (_exc_type=None, _exc_value=None, _traceback=None))]
fn __aexit__<'py>(
&mut self,
py: Python<'py>,
_exc_type: Option<Bound<'py, PyAny>>,
_exc_value: Option<Bound<'py, PyAny>>,
_traceback: Option<Bound<'py, PyAny>>,
) -> PyResult<Bound<'py, PyAny>> {
future_into_py(py, async move {
// In the future, we could call an async close on the core connection here
// e.g., client.close().await;
Ok(false)
})
}
Comment on lines +113 to +127
Copy link

Copilot AI Apr 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__exit__ calls self.close()?, but the new async context manager __aexit__ does not. This means async with await FlussConnection.create(...) will not close the connection (even if close() is implemented later). Please mirror __exit__ by calling self.close()? in __aexit__ (it can be done before creating the future since close() is synchronous).

Copilot uses AI. Check for mistakes.

fn __repr__(&self) -> String {
"FlussConnection()".to_string()
}
Expand Down
Loading
Loading