diff --git a/src/content/docs/guides/testing/write-tests.mdx b/src/content/docs/guides/testing/write-tests.mdx
index ef087b8b7..30c9632d3 100644
--- a/src/content/docs/guides/testing/write-tests.mdx
+++ b/src/content/docs/guides/testing/write-tests.mdx
@@ -354,7 +354,7 @@ key and opt into graceful skipping with `skip: {on: capability-unavailable}`:
# tests/gcs/test.yaml
suite: gcs-smoke
requires:
- operators: [from_gcs]
+ operators: [from_google_cloud_storage]
skip:
on: capability-unavailable
```
@@ -382,7 +382,7 @@ You now have a project that owns its inputs, tests, fixtures, and baselines.
From here you can:
- testing/run-tests to learn about executing the suite,
- selecting tests, and setting up CI.
+selecting tests, and setting up CI.
- Add custom runners under `runners/` when you need specialized logic around
`tenzir` invocations.
- Build Python fixtures that publish or verify data through the helper APIs in
diff --git a/src/content/docs/integrations/google/cloud-storage.mdx b/src/content/docs/integrations/google/cloud-storage.mdx
index 849436bb9..f6aad2b66 100644
--- a/src/content/docs/integrations/google/cloud-storage.mdx
+++ b/src/content/docs/integrations/google/cloud-storage.mdx
@@ -14,20 +14,20 @@ Default Credentials](https://google.aip.dev/auth/4110).
## Examples
-Use from_gcs to read files from Cloud
+Use from_google_cloud_storage to read files from Cloud
Storage. It supports glob patterns and automatic format detection. For writing,
use save_gcs with a print operator.
### Read events from a file in a bucket
```tql
-from_gcs "gs://bucket/path/to/file.json"
+from_google_cloud_storage "gs://bucket/path/to/file.json"
```
### Read all JSON files from a bucket
```tql
-from_gcs "gs://bucket/logs/**.json"
+from_google_cloud_storage "gs://bucket/logs/**.json"
```
### Write an event to a file in a bucket
diff --git a/src/content/docs/reference/operators.mdx b/src/content/docs/reference/operators.mdx
index d5390fbc9..58de31782 100644
--- a/src/content/docs/reference/operators.mdx
+++ b/src/content/docs/reference/operators.mdx
@@ -363,14 +363,14 @@ operators:
description: 'Receives events via Fluent Bit.'
example: 'from_fluent_bit "opentelemetry"'
path: 'reference/operators/from_fluent_bit'
- - name: 'from_gcs'
- description: 'Reads one or multiple files from Google Cloud Storage.'
- example: 'from_gcs "gs://my-bucket/data/**.json"'
- path: 'reference/operators/from_gcs'
- name: 'from_google_cloud_pubsub'
description: 'Subscribes to a Google Cloud Pub/Sub subscription and yields events.'
example: 'from_google_cloud_pubsub project_id="my-project", subscription_id="my-sub"'
path: 'reference/operators/from_google_cloud_pubsub'
+ - name: 'from_google_cloud_storage'
+ description: 'Reads one or multiple files from Google Cloud Storage.'
+ example: 'from_google_cloud_storage "gs://my-bucket/data/**.json"'
+ path: 'reference/operators/from_google_cloud_storage'
- name: 'from_http'
description: 'Sends and receives HTTP/1.1 requests.'
example: 'from_http "0.0.0.0:8080"'
@@ -735,6 +735,10 @@ operators:
description: 'Sends OCSF events to Amazon Security Lake.'
example: 'to_amazon_security_lake "s3://…"'
path: 'reference/operators/to_amazon_security_lake'
+ - name: 'to_azure_blob_storage'
+ description: 'Writes events to one or multiple blobs in Azure Blob Storage.'
+ example: 'to_azure_blob_storage "abfs://container/data/{uuid}.json" { write_ndjson }'
+ path: 'reference/operators/to_azure_blob_storage'
- name: 'to_azure_log_analytics'
description: 'Sends events to the Microsoft Azure Logs Ingestion API.'
example: 'to_azure_log_analytics tenant_id="...", workspace_id="..."'
@@ -743,6 +747,10 @@ operators:
description: 'Sends events to a ClickHouse table.'
example: 'to_clickhouse table="my_table"'
path: 'reference/operators/to_clickhouse'
+ - name: 'to_file'
+ description: 'Writes events to one or multiple files on a filesystem.'
+ example: 'to_file "/tmp/out.json" { write_ndjson }'
+ path: 'reference/operators/to_file'
- name: 'to_fluent_bit'
description: 'Sends events via Fluent Bit.'
example: 'to_fluent_bit "elasticsearch" …'
@@ -755,6 +763,10 @@ operators:
description: 'Publishes events to a Google Cloud Pub/Sub topic.'
example: 'to_google_cloud_pubsub project_id="my-project", topic_id="alerts", message=text'
path: 'reference/operators/to_google_cloud_pubsub'
+ - name: 'to_google_cloud_storage'
+ description: 'Writes events to one or multiple objects in Google Cloud Storage.'
+ example: 'to_google_cloud_storage "gs://my-bucket/data/{uuid}.json" { write_ndjson }'
+ path: 'reference/operators/to_google_cloud_storage'
- name: 'to_google_secops'
description: 'Sends unstructured events to a Google SecOps Chronicle instance.'
example: 'to_google_secops …'
@@ -775,6 +787,10 @@ operators:
description: 'Sends events to an OpenSearch-compatible Bulk API.'
example: 'to_opensearch "localhost:9200", …'
path: 'reference/operators/to_opensearch'
+ - name: 'to_s3'
+ description: 'Writes events to one or multiple objects in Amazon S3.'
+ example: 'to_s3 "s3://my-bucket/data/{uuid}.json" { write_ndjson }'
+ path: 'reference/operators/to_s3'
- name: 'to_sentinelone_data_lake'
description: 'Sends security events to SentinelOne Singularity Data Lake via REST API.'
example: 'to_sentinelone_data_lake "https://…", …'
@@ -2239,18 +2255,18 @@ from_fluent_bit "opentelemetry"
-
+
```tql
-from_gcs "gs://my-bucket/data/**.json"
+from_google_cloud_pubsub project_id="my-project", subscription_id="my-sub"
```
-
+
```tql
-from_google_cloud_pubsub project_id="my-project", subscription_id="my-sub"
+from_google_cloud_storage "gs://my-bucket/data/**.json"
```
@@ -2595,6 +2611,14 @@ to_amazon_security_lake "s3://…"
+
+
+```tql
+to_azure_blob_storage "abfs://container/data/{uuid}.json" { write_ndjson }
+```
+
+
+
```tql
@@ -2611,6 +2635,14 @@ to_clickhouse table="my_table"
+
+
+```tql
+to_file "/tmp/out.json" { write_ndjson }
+```
+
+
+
```tql
@@ -2635,6 +2667,14 @@ to_google_cloud_pubsub project_id="my-project", topic_id="alerts", message=text
+
+
+```tql
+to_google_cloud_storage "gs://my-bucket/data/{uuid}.json" { write_ndjson }
+```
+
+
+
```tql
@@ -2675,6 +2715,14 @@ to_opensearch "localhost:9200", …
+
+
+```tql
+to_s3 "s3://my-bucket/data/{uuid}.json" { write_ndjson }
+```
+
+
+
```tql
diff --git a/src/content/docs/reference/operators/from_azure_blob_storage.mdx b/src/content/docs/reference/operators/from_azure_blob_storage.mdx
index 40f2d49a2..811525086 100644
--- a/src/content/docs/reference/operators/from_azure_blob_storage.mdx
+++ b/src/content/docs/reference/operators/from_azure_blob_storage.mdx
@@ -10,7 +10,7 @@ Reads one or multiple files from Azure Blob Storage.
```tql
from_azure_blob_storage url:string, [account_key=string, watch=bool,
- remove=bool, rename=string->string, path_field=field, max_age=duration] { … }
+ remove=bool, rename=string->string, max_age=duration] { … }
```
## Description
@@ -90,7 +90,10 @@ from_azure_blob_storage "abfs://input/**.json",
### Add source path to events
```tql
-from_azure_blob_storage "abfs://data/**.json", path_field=source_file
+from_azure_blob_storage "abfs://data/**.json" {
+ read_json
+ source_file = $file.path
+}
```
## See Also
diff --git a/src/content/docs/reference/operators/from_file.mdx b/src/content/docs/reference/operators/from_file.mdx
index 10ded2a93..5119ed4f3 100644
--- a/src/content/docs/reference/operators/from_file.mdx
+++ b/src/content/docs/reference/operators/from_file.mdx
@@ -10,7 +10,7 @@ Reads one or multiple files from a filesystem.
```tql
from_file url:string, [watch=bool, remove=bool, rename=string->string,
- path_field=field, max_age=duration, mmap=bool] { … }
+ max_age=duration, mmap=bool] { … }
```
## Description
diff --git a/src/content/docs/reference/operators/from_gcs.mdx b/src/content/docs/reference/operators/from_google_cloud_storage.mdx
similarity index 74%
rename from src/content/docs/reference/operators/from_gcs.mdx
rename to src/content/docs/reference/operators/from_google_cloud_storage.mdx
index f2748a5f6..bcd8949a4 100644
--- a/src/content/docs/reference/operators/from_gcs.mdx
+++ b/src/content/docs/reference/operators/from_google_cloud_storage.mdx
@@ -1,7 +1,7 @@
---
-title: from_gcs
+title: from_google_cloud_storage
category: Inputs/Events
-example: 'from_gcs "gs://my-bucket/data/**.json"'
+example: 'from_google_cloud_storage "gs://my-bucket/data/**.json"'
---
import FromFileCommonParams from '@partials/operators/FromFileCommonParams.mdx';
@@ -9,13 +9,13 @@ import FromFileCommonParams from '@partials/operators/FromFileCommonParams.mdx';
Reads one or multiple files from Google Cloud Storage.
```tql
-from_gcs url:string, [anonymous=bool, watch=bool, remove=bool,
- rename=string->string, path_field=field, max_age=duration] { … }
+from_google_cloud_storage url:string, [anonymous=bool, watch=bool, remove=bool,
+ rename=string->string, max_age=duration] { … }
```
## Description
-The `from_gcs` operator reads files from Google Cloud Storage, with support for
+The `from_google_cloud_storage` operator reads files from Google Cloud Storage, with support for
glob patterns, automatic format detection, and file monitoring.
By default, authentication is handled by Google's Application Default
@@ -56,19 +56,19 @@ Defaults to `false`.
### Read every JSON file from a bucket
```tql
-from_gcs "gs://my-bucket/data/**.json"
+from_google_cloud_storage "gs://my-bucket/data/**.json"
```
### Read CSV files from a public bucket
```tql
-from_gcs "gs://public-dataset/data.csv", anonymous=true
+from_google_cloud_storage "gs://public-dataset/data.csv", anonymous=true
```
### Read Zeek logs continuously
```tql
-from_gcs "gs://logs/zeek/**.log", watch=true {
+from_google_cloud_storage "gs://logs/zeek/**.log", watch=true {
read_zeek_tsv
}
```
@@ -76,13 +76,16 @@ from_gcs "gs://logs/zeek/**.log", watch=true {
### Add source path to events
```tql
-from_gcs "gs://data-bucket/**.json", path_field=source_file
+from_google_cloud_storage "gs://data-bucket/**.json" {
+ read_json
+ source_file = $file.path
+}
```
### Read Suricata EVE JSON logs with custom parsing
```tql
-from_gcs "gs://security-logs/suricata/**.json" {
+from_google_cloud_storage "gs://security-logs/suricata/**.json" {
read_suricata
}
```
diff --git a/src/content/docs/reference/operators/from_s3.mdx b/src/content/docs/reference/operators/from_s3.mdx
index c91d5ce4b..c51ad9b7b 100644
--- a/src/content/docs/reference/operators/from_s3.mdx
+++ b/src/content/docs/reference/operators/from_s3.mdx
@@ -11,7 +11,7 @@ Reads one or multiple files from Amazon S3.
```tql
from_s3 url:string, [anonymous=bool, aws_iam=record, watch=bool,
- remove=bool, rename=string->string, path_field=field, max_age=duration] { … }
+ remove=bool, rename=string->string, max_age=duration] { … }
```
## Description
@@ -98,7 +98,10 @@ from_s3 "s3://input-bucket/**.json",
### Add source path to events
```tql
-from_s3 "s3://data-bucket/**.json", path_field=source_file
+from_s3 "s3://data-bucket/**.json" {
+ read_json
+ source_file = $file.path
+}
```
### Read Zeek logs with anonymous access
diff --git a/src/content/docs/reference/operators/to_azure_blob_storage.mdx b/src/content/docs/reference/operators/to_azure_blob_storage.mdx
new file mode 100644
index 000000000..520440dba
--- /dev/null
+++ b/src/content/docs/reference/operators/to_azure_blob_storage.mdx
@@ -0,0 +1,110 @@
+---
+title: to_azure_blob_storage
+category: Outputs/Events
+example: 'to_azure_blob_storage "abfs://container/data/{uuid}.json" { write_ndjson }'
+---
+
+import ToArrowFsCommonParams from '@partials/operators/ToArrowFsCommonParams.mdx';
+
+Writes events to one or multiple blobs in Azure Blob Storage.
+
+```tql
+to_azure_blob_storage url:string, [account_key=string, max_size=int,
+ timeout=duration, partition_by=list] { … }
+```
+
+## Description
+
+The `to_azure_blob_storage` operator writes events to Azure Blob Storage,
+automatically opening new blobs when a rotation condition triggers. It
+supports hive-style partitioning through a `**` placeholder in the URL and
+per-partition unique blob names through a `{uuid}` placeholder.
+
+By default, authentication is handled by the Azure SDK's credential chain which
+may read from multiple environment variables, such as:
+
+- `AZURE_TENANT_ID`
+- `AZURE_CLIENT_ID`
+- `AZURE_CLIENT_SECRET`
+- `AZURE_AUTHORITY_HOST`
+- `AZURE_CLIENT_CERTIFICATE_PATH`
+- `AZURE_FEDERATED_TOKEN_FILE`
+
+### `url: string`
+
+URL identifying the Azure Blob Storage location where data should be written.
+
+Supported URI formats:
+
+1. `abfs[s]://.blob.core.windows.net[/[/]]`
+2. `abfs[s]://@.dfs.core.windows.net[/]`
+3. `abfs[s]://[@][.][:][/[/]]`
+4. `abfs[s]://[@][/]`
+
+(1) and (2) are compatible with the Azure Data Lake Storage Gen2 URIs, (3) is
+for Azure Blob Storage compatible services including Azurite, and (4) is a
+shorter version of (1) and (2).
+
+The path portion may contain two placeholders:
+
+- `**` marks the location where hive partition segments are inserted. When
+ present, `partition_by` must also be set, and vice versa.
+- `{uuid}` expands to a fresh UUIDv7 for every blob. This is required when
+ partitioning or when rotation can produce multiple blobs, so that rotated
+ or per-partition blobs do not overwrite each other.
+
+:::tip[Authenticate with the Azure CLI]
+Run `az login` on the command-line to authenticate the current user with Azure's
+command-line arguments.
+:::
+
+### `account_key = string (optional)`
+
+Account key for authenticating with Azure Blob Storage.
+
+
+
+## Examples
+
+### Write a single NDJSON blob
+
+```tql
+to_azure_blob_storage "abfs://my-container/events/out_{uuid}.json" {
+ write_ndjson
+}
+```
+
+### Partition events into Parquet blobs by date
+
+```tql
+to_azure_blob_storage "abfs://my-container/events/**/data_{uuid}.parquet",
+ partition_by=[year, month] {
+ write_parquet
+}
+```
+
+### Write with explicit account key
+
+```tql
+to_azure_blob_storage "abfs://container/data/out_{uuid}.json",
+ account_key="your-account-key" {
+ write_ndjson
+}
+```
+
+### Rotate to a new blob every 5 minutes
+
+```tql
+to_azure_blob_storage "abfs://my-container/logs/events_{uuid}.json",
+ timeout=5min {
+ write_ndjson
+}
+```
+
+## See Also
+
+- from_azure_blob_storage
+- save_azure_blob_storage
+- to_file
+- to_hive
+- microsoft/azure-blob-storage
diff --git a/src/content/docs/reference/operators/to_file.mdx b/src/content/docs/reference/operators/to_file.mdx
new file mode 100644
index 000000000..95a6b28b0
--- /dev/null
+++ b/src/content/docs/reference/operators/to_file.mdx
@@ -0,0 +1,99 @@
+---
+title: to_file
+category: Outputs/Events
+example: 'to_file "/tmp/out.json" { write_ndjson }'
+---
+
+import ToArrowFsCommonParams from '@partials/operators/ToArrowFsCommonParams.mdx';
+
+Writes events to one or multiple files on a filesystem.
+
+```tql
+to_file url:string, [max_size=int, timeout=duration,
+ partition_by=list] { … }
+```
+
+## Description
+
+The `to_file` operator writes events to local filesystems, automatically
+opening new files when a rotation condition triggers. It supports hive-style
+partitioning through a `**` placeholder in the URL and per-partition unique
+filenames through a `{uuid}` placeholder.
+
+### `url: string`
+
+Path or URL identifying where files should be written.
+
+When `~` is the first character, it is substituted with the value of the
+`$HOME` environment variable. Relative paths are resolved against the current
+working directory.
+
+The path may contain two placeholders:
+
+- `**` marks the location where hive partition segments are inserted. When
+ present, `partition_by` must also be set, and vice versa.
+- `{uuid}` expands to a fresh UUIDv7 for every file. This is required when
+ partitioning or when rotation can produce multiple files, so that rotated
+ or per-partition files do not overwrite each other.
+
+
+
+## Examples
+
+### Write all events to a single NDJSON file
+
+```tql
+from {msg: "hello"}, {msg: "world"}
+to_file "/tmp/out.json" {
+ write_ndjson
+}
+```
+
+### Partition events by a field into separate files
+
+```tql
+from {region: "us", n: 1},
+ {region: "eu", n: 2},
+ {region: "us", n: 3}
+to_file "/tmp/events/**/data_{uuid}.json", partition_by=[region] {
+ write_ndjson
+}
+// Produces files under:
+// /tmp/events/region=us/data_.json
+// /tmp/events/region=eu/data_.json
+```
+
+### Partition by multiple fields
+
+```tql
+to_file "/tmp/events/**/data_{uuid}.parquet",
+ partition_by=[year, month] {
+ write_parquet
+}
+// Produces files under:
+// /tmp/events/year=/month=/data_.parquet
+```
+
+### Rotate files when they exceed a size
+
+```tql
+to_file "/tmp/logs/events_{uuid}.json", max_size=10M {
+ write_ndjson
+}
+```
+
+### Rotate files after a fixed duration
+
+```tql
+to_file "/tmp/logs/events_{uuid}.json", timeout=1min {
+ write_ndjson
+}
+```
+
+## See Also
+
+- from_file
+- save_file
+- to
+- to_hive
+- file
diff --git a/src/content/docs/reference/operators/to_google_cloud_storage.mdx b/src/content/docs/reference/operators/to_google_cloud_storage.mdx
new file mode 100644
index 000000000..146becef9
--- /dev/null
+++ b/src/content/docs/reference/operators/to_google_cloud_storage.mdx
@@ -0,0 +1,96 @@
+---
+title: to_google_cloud_storage
+category: Outputs/Events
+example: 'to_google_cloud_storage "gs://my-bucket/data/{uuid}.json" { write_ndjson }'
+---
+
+import ToArrowFsCommonParams from '@partials/operators/ToArrowFsCommonParams.mdx';
+
+Writes events to one or multiple objects in Google Cloud Storage.
+
+```tql
+to_google_cloud_storage url:string, [anonymous=bool, max_size=int,
+ timeout=duration, partition_by=list] { … }
+```
+
+## Description
+
+The `to_google_cloud_storage` operator writes events to Google Cloud Storage,
+automatically opening new objects when a rotation condition triggers. It
+supports hive-style partitioning through a `**` placeholder in the URL and
+per-partition unique object names through a `{uuid}` placeholder.
+
+By default, authentication is handled by Google's Application Default
+Credentials (ADC) chain, which may read from multiple sources:
+
+- `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to a service
+ account key file
+- User credentials from `gcloud auth application-default login`
+- Service account attached to the compute instance (Compute Engine, GKE)
+- Google Cloud SDK credentials
+
+### `url: string`
+
+URL identifying the Google Cloud Storage location where data should be written.
+
+The syntax is `gs:///(?)`. The
+`` are query parameters. Per the [Arrow
+documentation](https://arrow.apache.org/docs/r/articles/fs.html#connecting-directly-with-a-uri),
+the following options exist:
+
+> For GCS, the supported parameters are `scheme`, `endpoint_override`, and
+> `retry_limit_seconds`.
+
+The path portion may contain two placeholders:
+
+- `**` marks the location where hive partition segments are inserted. When
+ present, `partition_by` must also be set, and vice versa.
+- `{uuid}` expands to a fresh UUIDv7 for every object. This is required when
+ partitioning or when rotation can produce multiple objects, so that rotated
+ or per-partition objects do not overwrite each other.
+
+### `anonymous = bool (optional)`
+
+Use anonymous credentials instead of any configured authentication. This only
+works for publicly writable buckets.
+
+Defaults to `false`.
+
+
+
+## Examples
+
+### Write a single NDJSON object
+
+```tql
+to_google_cloud_storage "gs://my-bucket/events/out_{uuid}.json" {
+ write_ndjson
+}
+```
+
+### Partition events into Parquet objects by date
+
+```tql
+to_google_cloud_storage "gs://my-bucket/events/**/data_{uuid}.parquet",
+ partition_by=[year, month] {
+ write_parquet
+}
+```
+
+### Rotate to a new object every 100 MB
+
+```tql
+to_google_cloud_storage "gs://my-bucket/logs/events_{uuid}.json.gz",
+ max_size=100M {
+ write_ndjson
+ compress_gzip
+}
+```
+
+## See Also
+
+- from_google_cloud_storage
+- save_gcs
+- to_file
+- to_hive
+- google/cloud-storage
diff --git a/src/content/docs/reference/operators/to_s3.mdx b/src/content/docs/reference/operators/to_s3.mdx
new file mode 100644
index 000000000..26e677d64
--- /dev/null
+++ b/src/content/docs/reference/operators/to_s3.mdx
@@ -0,0 +1,122 @@
+---
+title: to_s3
+category: Outputs/Events
+example: 'to_s3 "s3://my-bucket/data/{uuid}.json" { write_ndjson }'
+---
+
+import ToArrowFsCommonParams from '@partials/operators/ToArrowFsCommonParams.mdx';
+import AWSIAMOptions from '@partials/operators/AWSIAMOptions.mdx';
+
+Writes events to one or multiple objects in Amazon S3.
+
+```tql
+to_s3 url:string, [anonymous=bool, aws_iam=record, max_size=int,
+ timeout=duration, partition_by=list] { … }
+```
+
+## Description
+
+The `to_s3` operator writes events to Amazon S3, automatically opening new
+objects when a rotation condition triggers. It supports hive-style
+partitioning through a `**` placeholder in the URL and per-partition unique
+object keys through a `{uuid}` placeholder.
+
+By default, authentication is handled by AWS's default credentials provider
+chain, which may read from multiple environment variables and credential files:
+
+- `~/.aws/credentials` and `~/.aws/config`
+- `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
+- `AWS_SESSION_TOKEN`
+- EC2 instance metadata service
+- ECS container credentials
+
+### `url: string`
+
+URL identifying the S3 location where data should be written.
+
+Supported URI format:
+`s3://[:@]/(?)`
+
+Options can be appended to the path as query parameters:
+
+- `region`: AWS region (e.g., `us-east-1`)
+- `scheme`: Connection scheme (`http` or `https`)
+- `endpoint_override`: Custom S3-compatible endpoint
+- `allow_bucket_creation`: Allow creating buckets if they don't exist
+- `allow_bucket_deletion`: Allow deleting buckets
+
+The path portion may contain two placeholders:
+
+- `**` marks the location where hive partition segments are inserted. When
+ present, `partition_by` must also be set, and vice versa.
+- `{uuid}` expands to a fresh UUIDv7 for every object. This is required when
+ partitioning or when rotation can produce multiple objects, so that rotated
+ or per-partition objects do not overwrite each other.
+
+### `anonymous = bool (optional)`
+
+Use anonymous credentials instead of any configured authentication.
+
+Defaults to `false`.
+
+
+
+
+
+## Examples
+
+### Write a single NDJSON object to S3
+
+```tql
+to_s3 "s3://my-bucket/events/out_{uuid}.json" {
+ write_ndjson
+}
+```
+
+### Partition events by date into Parquet objects
+
+```tql
+to_s3 "s3://my-bucket/events/**/data_{uuid}.parquet",
+ partition_by=[year, month, day] {
+ write_parquet
+}
+// Produces objects under:
+// s3://my-bucket/events/year=/month=/day=/data_.parquet
+```
+
+### Write with explicit credentials
+
+```tql
+to_s3 "s3://my-bucket/data/out_{uuid}.json",
+ aws_iam={
+ access_key_id: "AKIAIOSFODNN7EXAMPLE",
+ secret_access_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
+ } {
+ write_ndjson
+}
+```
+
+### Write to an S3-compatible endpoint
+
+```tql
+to_s3 "s3://my-bucket/data/out_{uuid}.json?endpoint_override=minio.example.com:9000&scheme=http" {
+ write_ndjson
+}
+```
+
+### Rotate to a new object every 10 MB
+
+```tql
+to_s3 "s3://my-bucket/logs/events_{uuid}.json.gz", max_size=10M {
+ write_ndjson
+ compress_gzip
+}
+```
+
+## See Also
+
+- from_s3
+- save_s3
+- to_file
+- to_hive
+- amazon/s3
diff --git a/src/content/docs/reference/test-framework/index.mdx b/src/content/docs/reference/test-framework/index.mdx
index 27e2e6366..802240a43 100644
--- a/src/content/docs/reference/test-framework/index.mdx
+++ b/src/content/docs/reference/test-framework/index.mdx
@@ -692,7 +692,7 @@ only supported category is `operators`:
# tests/gcs/test.yaml
suite: gcs-integration
requires:
- operators: [from_gcs, to_gcs]
+ operators: [from_google_cloud_storage, to_google_cloud_storage]
skip:
on: capability-unavailable
reason: requires GCS operators
diff --git a/src/partials/operators/FromFileCommonParams.mdx b/src/partials/operators/FromFileCommonParams.mdx
index 17c868a00..ebff60947 100644
--- a/src/partials/operators/FromFileCommonParams.mdx
+++ b/src/partials/operators/FromFileCommonParams.mdx
@@ -25,13 +25,6 @@ The operator automatically creates any intermediate directories required for the
target path. If the target path ends with a trailing slash (`/`), the original
filename will be automatically appended to create the final path.
-### `path_field = field (optional)`
-
-This makes the operator insert the path to the file where an event originated
-from before emitting it.
-
-By default, paths will not be inserted into the outgoing events.
-
### `max_age = duration (optional)`
Only process files that were modified within the specified duration from the
@@ -51,8 +44,11 @@ following fields:
| `path` | `string` | The absolute path of the file being read |
| `mtime` | `time` | The last modification time of the file |
-For example, to add the source path to each event:
+For example, to attach the source path to each event:
```tql
-source = $file.path
+from_file "/data/*.json" {
+ read_json
+ source = $file.path
+}
```
diff --git a/src/partials/operators/ToArrowFsCommonParams.mdx b/src/partials/operators/ToArrowFsCommonParams.mdx
new file mode 100644
index 000000000..f2c28ac50
--- /dev/null
+++ b/src/partials/operators/ToArrowFsCommonParams.mdx
@@ -0,0 +1,34 @@
+### `max_size = int (optional)`
+
+Rotates to a new file after the current file exceeds this size in bytes.
+Because rotation only fires after the threshold is crossed, individual files
+may be slightly larger than `max_size`.
+
+Defaults to `100M`.
+
+### `timeout = duration (optional)`
+
+Rotates to a new file after the current file has been open for this duration.
+Rotation is measured from the time the file was first opened.
+
+Defaults to `5min`.
+
+### `partition_by = list (optional)`
+
+A list of fields used to partition events into separate files. For every
+distinct combination of partition-field values, a separate file (or group of
+rotated files) is written. The URL must contain a `**` placeholder, which is
+replaced by the hive-style path `field1=value1/field2=value2/…`.
+
+Unlike to_hive, the partitioning fields are **not** stripped from
+the written events — they remain in each record.
+
+### `{ … }`
+
+Pipeline that transforms the incoming events into the byte stream that is
+written to each file. The pipeline must return bytes, so it must end with a
+writer such as `write_ndjson`, `write_parquet`, or `write_csv`, optionally
+followed by a compressor such as `compress_gzip`.
+
+The subpipeline runs once per output file. When rotation or partitioning
+produces a new file, a new instance of the subpipeline is spawned for it.