diff --git a/skills/tinyfish-web-agent/LICENSE b/skills/tinyfish-web-agent/LICENSE new file mode 100644 index 0000000..c3b3fdd --- /dev/null +++ b/skills/tinyfish-web-agent/LICENSE @@ -0,0 +1,190 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to the Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + Copyright 2026 TinyFish + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/skills/tinyfish-web-agent/SKILL.md b/skills/tinyfish-web-agent/SKILL.md index f7e1122..8e95ff8 100644 --- a/skills/tinyfish-web-agent/SKILL.md +++ b/skills/tinyfish-web-agent/SKILL.md @@ -1,6 +1,6 @@ --- name: tinyfish -description: Use TinyFish web agent to extract/scrape websites, extract data, and automate browser actions using natural language. Use when you need to extract/scrape data from websites, handle bot-protected sites, or automate web tasks. +description: Use TinyFish web agent to automate browser tasks, fill forms, navigate multi-step workflows, and extract data from any website including bot-protected sites. Use when you need to interact with websites beyond simple fetching — login flows, form submissions, multi-page navigation, or sites behind Cloudflare/DataDome. homepage: https://agent.tinyfish.ai requires: env: @@ -9,17 +9,18 @@ requires: # TinyFish Web Agent -Requires: `TINYFISH_API_KEY` environment variable +AI-powered browser automation. Describe what you want in natural language — TinyFish handles the clicking, typing, and navigating. ## Pre-flight Check (REQUIRED) -Before making any API call, **always** run this first to verify the key is available: +Before making any API call, **always** run this first: ```bash [ -n "$TINYFISH_API_KEY" ] && echo "TINYFISH_API_KEY is set" || echo "TINYFISH_API_KEY is NOT set" +command -v jq >/dev/null 2>&1 && echo "jq is available" || echo "jq is NOT installed" ``` -If the key is **not set**, you **MUST stop and ask the user** to add their API key. Do **NOT** fall back to other tools or approaches — the task requires TinyFish. +If the key is **not set**, you **MUST stop and ask the user** to add their API key. Do **NOT** fall back to other tools — the task requires TinyFish. Tell the user: @@ -42,110 +43,236 @@ Tell the user: > } > ``` -Do NOT proceed until the key is confirmed available. +Do NOT proceed until both the key and `jq` are confirmed available. -## Best Practices +## Quick Start + +### Using the helper script -1. **Specify JSON format**: Always describe the exact structure you want returned -2. **Parallel calls**: When extracting from multiple independent sites, make separate parallel calls instead of combining into one prompt +```bash +# Fill out a contact form +./scripts/run.sh "https://example.com/contact" \ + 'Fill the contact form with name "John Doe" and email "john@example.com", then click Submit' -## Basic Extract/Scrape +# Same task with stealth mode and geo-proxy +./scripts/run.sh "https://example.com/contact" \ + 'Fill the contact form with name "John Doe" and email "john@example.com", then click Submit' \ + --stealth --proxy US +``` -Extract data from a page. Specify the JSON structure you want: +### Using curl directly ```bash -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://example.com", - "goal": "Extract product info as JSON: {\"name\": str, \"price\": str, \"in_stock\": bool}" - }' + "url": "https://example.com/contact", + "goal": "Fill the contact form with name \"John Doe\" and email \"john@example.com\", then click Submit", + "api_integration": "openclaw" + }' | jq '.result' ``` -## Multiple Items +## Web Agent Examples + +### Form Filling -Extract lists of data with explicit structure: +Describe the form fields and values in natural language. TinyFish finds the inputs, fills them, and submits. ```bash -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://example.com/products", - "goal": "Extract all products as JSON array: [{\"name\": str, \"price\": str, \"url\": str}]" - }' + "url": "https://example.com/signup", + "goal": "Fill the registration form: first name \"Jane\", last name \"Smith\", email \"jane.smith@example.com\", select \"United States\" from the country dropdown, check the terms checkbox, then click the Register button", + "api_integration": "openclaw" + }' | jq '.result' ``` -## Stealth Mode +### Multi-Step Workflow -For bot-protected sites, add `"browser_profile": "stealth"` to the request body: +Use numbered steps when the task spans multiple pages or actions. TinyFish executes them in order. ```bash -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://protected-site.com", - "goal": "Extract product data as JSON: {\"name\": str, \"price\": str, \"description\": str}", - "browser_profile": "stealth" - }' + "url": "https://example.com/login", + "goal": "1. Log in with username \"testuser\" and password \"testpass123\"\n2. Navigate to the Account Settings page\n3. Extract the current plan name and renewal date as JSON: {\"plan\": \"string\", \"renewal_date\": \"string\"}", + "api_integration": "openclaw" + }' | jq '.result' ``` -## Proxy +### Stealth Mode -Route through a specific country by adding `"proxy_config"` to the body: +For sites with bot protection (Cloudflare, DataDome), add `browser_profile: "stealth"`. This uses anti-detection fingerprinting. ```bash -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://geo-restricted-site.com", - "goal": "Extract pricing data as JSON: {\"item\": str, \"price\": str, \"currency\": str}", + "url": "https://protected-site.com/listings", + "goal": "Search for \"wireless headphones\" in the search bar, then extract the first 5 results as JSON: [{\"name\": \"string\", \"price\": \"string\", \"rating\": \"string\"}]", "browser_profile": "stealth", - "proxy_config": {"enabled": true, "country_code": "US"} - }' + "api_integration": "openclaw" + }' | jq '.result' ``` -## Output +### Geo-Proxied Browsing -The SSE stream returns `data: {...}` lines. The final result is the event where `type == "COMPLETE"` and `status == "COMPLETED"` — the extracted data is in the `resultJson` field. Claude reads the raw SSE output directly; no script-side parsing is needed. +Access geo-restricted content by routing through a specific country. Combine with stealth mode for protected sites. -## Parallel Extraction +```bash +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ + -H "X-API-Key: $TINYFISH_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://example.co.uk/products", + "goal": "Extract the featured product name and price in GBP as JSON: {\"name\": \"string\", \"price\": \"string\", \"currency\": \"GBP\"}", + "browser_profile": "stealth", + "proxy_config": {"enabled": true, "country_code": "GB"}, + "api_integration": "openclaw" + }' | jq '.result' +``` + +Available country codes: `US`, `GB`, `CA`, `DE`, `FR`, `JP`, `AU`. + +## Data Extraction -When extracting from multiple independent sources, make separate parallel curl calls instead of combining into one prompt: +When you only need to read data from a page, specify the exact JSON schema you want returned — include sample values so TinyFish knows the expected types and format. + +### Structured Product Data -**Good** - Parallel calls: ```bash -# Compare pizza prices - run these simultaneously -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://pizzahut.com", - "goal": "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]" - }' + "url": "https://example.com/products", + "goal": "Extract all visible products as a JSON array. Use this exact schema: [{\"name\": \"Example Product\", \"price\": \"$29.99\", \"in_stock\": true, \"url\": \"/product/123\"}]. If a price shows \"Contact Us\", set price to null.", + "api_integration": "openclaw" + }' | jq '.result' +``` -curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ +### Parallel Extraction + +When extracting from multiple independent sites, make separate parallel calls — this is faster and more reliable than combining into one goal: + +```bash +# Run these simultaneously +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://dominos.com", - "goal": "Extract pizza prices as JSON: [{\"name\": str, \"price\": str}]" - }' + "url": "https://store-a.com/product/widget", + "goal": "Extract the product name and price as JSON: {\"store\": \"Store A\", \"name\": \"string\", \"price\": \"string\"}", + "api_integration": "openclaw" + }' | jq '.result' & + +curl --max-time 120 -s -X POST "https://agent.tinyfish.ai/v1/automation/run" \ + -H "X-API-Key: $TINYFISH_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "url": "https://store-b.com/product/widget", + "goal": "Extract the product name and price as JSON: {\"store\": \"Store B\", \"name\": \"string\", \"price\": \"string\"}", + "api_integration": "openclaw" + }' | jq '.result' & + +wait +``` + +## Best Practices + +**Be specific (the "intern test").** Think of TinyFish as a capable but literal-minded assistant. If a smart but literal intern would have to guess what you mean, add more detail. Specific goals complete faster and return less noise. + +- **Bad:** `"Get product info"` +- **Good:** `"Extract product name, price in USD, and availability as JSON: {\"name\": \"string\", \"price\": \"$0.00\", \"in_stock\": true}"` + +**Include sample values in your JSON schema.** This tells TinyFish the expected type and format for each field. Use realistic examples: `"$29.99"` not just `"string"`. + +**Use numbered steps for multi-step workflows.** Each step should be one clear action: +```text +1. Click the Login button +2. Enter username "testuser" and password "testpass" +3. Navigate to Settings > Billing +4. Extract the current plan name ``` -**Bad** - Single combined call: +**Add guardrails when needed.** Explicitly say what NOT to do: +- `"Do NOT click any purchase or checkout buttons"` +- `"Do NOT navigate away from this page"` +- `"If a CAPTCHA appears, stop and return {\"error\": \"captcha\"}"` + +**Handle edge cases in your goal.** Anticipate what might vary: +- `"If price shows 'Contact Us', set price to null"` +- `"If the page shows 'No results', return an empty array"` +- `"If login fails, return {\"error\": \"login_failed\"}"` + +## Error Handling + +| HTTP Code | Error | What to Do | +|-----------|-------|------------| +| 401 | `MISSING_API_KEY` / `INVALID_API_KEY` | Check that `$TINYFISH_API_KEY` is set and valid | +| 400 | `INVALID_INPUT` | Verify URL format and that goal is not empty | +| 429 | `RATE_LIMIT_EXCEEDED` | Wait a moment and retry | +| 403 | `FORBIDDEN` | Account has no remaining credits — check your account | +| 500 | `INTERNAL_ERROR` | Transient server error — retry the request | + +**Task-level failures** (HTTP 200 but `status: "FAILED"`): + +| Symptom | What to Do | +|---------|------------| +| Goal too vague | Make the goal more specific with exact fields and schema | +| Bot detection blocked the task | Add `"browser_profile": "stealth"` | +| Geo-restricted content | Add `"proxy_config"` with the appropriate country code | +| Page requires interaction first | Use numbered steps to navigate to the right state | + +### Limitations + +- **CAPTCHAs:** Cannot solve reCAPTCHA, hCaptcha, or similar challenges +- **Infinite scroll:** May not automatically scroll to load all dynamic content +- **Session persistence:** Each run starts fresh with no cookies from previous runs +- **Timeout:** Runs have a ~5 minute server-side timeout + +## Advanced: SSE Streaming + +For long-running tasks where you want real-time progress, use the SSE endpoint or the `--async` flag: + ```bash -# Don't do this - less reliable and slower +# Via helper script +./scripts/run.sh "https://example.com" 'Extract all article titles' --async + +# Via curl curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ -H "X-API-Key: $TINYFISH_API_KEY" \ -H "Content-Type: application/json" \ -d '{ - "url": "https://pizzahut.com", - "goal": "Extract prices from Pizza Hut and also go to Dominos..." + "url": "https://example.com", + "goal": "Extract all article titles as a JSON array", + "api_integration": "openclaw" }' ``` -Each independent extraction task should be its own API call. This is faster (parallel execution) and more reliable. +SSE event types: `STARTED` (includes `runId`), `STREAMING_URL` (live browser view), `PROGRESS` (browser action updates), `HEARTBEAT`, `COMPLETE` (final result with `status` and `resultJson`). + +## Output + +The sync endpoint returns a JSON response: + +```json +{ + "run_id": "abc-123", + "status": "COMPLETED", + "started_at": "2025-01-01T00:00:00Z", + "finished_at": "2025-01-01T00:01:30Z", + "num_of_steps": 5, + "result": { "name": "Widget", "price": "$29.99" }, + "error": null +} +``` + +- **`COMPLETED`** — Task succeeded. Extracted data is in `result`. Pipe through `jq '.result'` to get just the data. +- **`FAILED`** — Task did not complete. Check `error` for the failure message and refine your goal. diff --git a/skills/tinyfish-web-agent/scripts/extract.sh b/skills/tinyfish-web-agent/scripts/extract.sh index 9f12eeb..ff37b3d 100755 --- a/skills/tinyfish-web-agent/scripts/extract.sh +++ b/skills/tinyfish-web-agent/scripts/extract.sh @@ -1,70 +1,5 @@ #!/usr/bin/env bash -# -# TinyFish web extract/scrape helper -# -# Usage: -# extract.sh [--stealth] [--proxy COUNTRY] -# -# Examples: -# extract.sh "https://example.com" 'Extract product as JSON: {"name": str, "price": str}' -# extract.sh "https://site.com" 'Get all links as JSON: [{"text": str, "url": str}]' --stealth -# extract.sh "https://site.com" 'Extract items' --stealth --proxy US - -set -euo pipefail - -if [ $# -lt 2 ]; then - echo "Usage: extract.sh [--stealth] [--proxy COUNTRY]" >&2 - exit 1 -fi - -if [ -z "${TINYFISH_API_KEY:-}" ]; then - echo "Error: TINYFISH_API_KEY environment variable not set" >&2 - exit 1 -fi - -URL="$1" -GOAL="$2" -shift 2 - -STEALTH=false -PROXY_COUNTRY="" - -while [ $# -gt 0 ]; do - case "$1" in - --stealth) - STEALTH=true - shift - ;; - --proxy) - PROXY_COUNTRY="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" >&2 - exit 1 - ;; - esac -done - -# Build JSON payload — escape URL and goal for safe embedding -JSON_URL=$(printf '%s' "$URL" | sed 's/\\/\\\\/g; s/"/\\"/g') -JSON_GOAL=$(printf '%s' "$GOAL" | sed 's/\\/\\\\/g; s/"/\\"/g') - -PAYLOAD="{\"url\":\"${JSON_URL}\",\"goal\":\"${JSON_GOAL}\"" - -if [ "$STEALTH" = true ]; then - PAYLOAD="${PAYLOAD},\"browser_profile\":\"stealth\"" -fi - -if [ -n "$PROXY_COUNTRY" ]; then - PAYLOAD="${PAYLOAD},\"proxy_config\":{\"enabled\":true,\"country_code\":\"${PROXY_COUNTRY}\"}" -fi - -PAYLOAD="${PAYLOAD}}" - -echo "Extracting from ${URL}..." >&2 - -exec curl -N -s -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ - -H "X-API-Key: ${TINYFISH_API_KEY}" \ - -H "Content-Type: application/json" \ - -d "$PAYLOAD" +# Deprecated: Use run.sh instead +echo "Note: extract.sh is deprecated. Use run.sh instead." >&2 +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +exec "$SCRIPT_DIR/run.sh" "$@" diff --git a/skills/tinyfish-web-agent/scripts/run.sh b/skills/tinyfish-web-agent/scripts/run.sh new file mode 100755 index 0000000..1c17ab7 --- /dev/null +++ b/skills/tinyfish-web-agent/scripts/run.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# +# TinyFish web automation — run a browser agent task +# +# Usage: +# run.sh [--stealth] [--proxy COUNTRY] [--async] +# +# Sync mode (default): waits up to 120s, prints result on success. +# Async mode (--async): streams raw SSE events from the run. +# +# Examples: +# run.sh "https://example.com" 'Extract product as JSON: {"name": str, "price": str}' +# run.sh "https://site.com" 'Get all links' --stealth +# run.sh "https://site.com" 'Extract items' --stealth --proxy US +# run.sh "https://site.com" 'Fill form' --async + +set -euo pipefail + +# --- Dependency checks --- + +if ! command -v jq >/dev/null 2>&1; then + echo "Error: jq is required but not installed. Install it: https://jqlang.github.io/jq/download/" >&2 + exit 1 +fi + +if [ -z "${TINYFISH_API_KEY:-}" ]; then + echo "Error: TINYFISH_API_KEY environment variable not set." >&2 + echo "Get your API key at: https://agent.tinyfish.ai/api-keys" >&2 + exit 1 +fi + +# --- Argument parsing --- + +if [ $# -lt 2 ]; then + echo "Usage: run.sh [--stealth] [--proxy COUNTRY] [--async]" >&2 + exit 1 +fi + +URL="$1" +GOAL="$2" +shift 2 + +STEALTH=false +PROXY_COUNTRY="" +ASYNC=false + +while [ $# -gt 0 ]; do + case "$1" in + --stealth) + STEALTH=true + shift + ;; + --proxy) + if [ $# -lt 2 ]; then + echo "Error: --proxy requires a country code argument" >&2 + exit 1 + fi + PROXY_COUNTRY="$2" + shift 2 + ;; + --async) + ASYNC=true + shift + ;; + *) + echo "Unknown option: $1" >&2 + exit 1 + ;; + esac +done + +# --- Build JSON payload with jq (safe string escaping) --- + +PAYLOAD=$(jq -n \ + --arg url "$URL" \ + --arg goal "$GOAL" \ + --arg integration "openclaw" \ + '{url: $url, goal: $goal, api_integration: $integration}') + +if [ "$STEALTH" = true ]; then + PAYLOAD=$(echo "$PAYLOAD" | jq '. + {browser_profile: "stealth"}') +fi + +if [ -n "$PROXY_COUNTRY" ]; then + PAYLOAD=$(echo "$PAYLOAD" | jq --arg cc "$PROXY_COUNTRY" \ + '. + {proxy_config: {enabled: true, country_code: $cc}}') +fi + +# --- Async mode: stream SSE events --- + +if [ "$ASYNC" = true ]; then + echo "Running (streaming)..." >&2 + exec curl -N -sS --fail-with-body -X POST "https://agent.tinyfish.ai/v1/automation/run-sse" \ + -H "X-API-Key: ${TINYFISH_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD" +fi + +# --- Sync mode: single request, parse response --- + +echo "Running..." >&2 + +HTTP_RESPONSE=$(curl --max-time 120 -s -w "\n%{http_code}" -X POST \ + "https://agent.tinyfish.ai/v1/automation/run" \ + -H "X-API-Key: ${TINYFISH_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "$PAYLOAD") || { + echo "Error: curl request failed" >&2 + exit 1 +} + +# Split response body and HTTP status code +HTTP_BODY=$(echo "$HTTP_RESPONSE" | sed '$d') +HTTP_CODE=$(echo "$HTTP_RESPONSE" | tail -n1) + +if [ "$HTTP_CODE" -lt 200 ] || [ "$HTTP_CODE" -ge 300 ]; then + echo "Error: HTTP ${HTTP_CODE}" >&2 + echo "$HTTP_BODY" >&2 + exit 1 +fi + +if ! STATUS=$(printf '%s' "$HTTP_BODY" | jq -er '.status' 2>/dev/null); then + echo "Error: Response was not valid JSON with a status field" >&2 + echo "$HTTP_BODY" >&2 + exit 1 +fi + +if [ "$STATUS" = "COMPLETED" ]; then + printf '%s' "$HTTP_BODY" | jq '.result' + exit 0 +elif [ "$STATUS" = "FAILED" ]; then + echo "Error: Task failed" >&2 + printf '%s' "$HTTP_BODY" | jq '.error' >&2 + exit 1 +else + echo "Error: Unexpected status: ${STATUS:-unknown}" >&2 + echo "$HTTP_BODY" >&2 + exit 1 +fi