From 9a0554ddc952d154522129a758c5b2c47d63574a Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Tue, 31 Mar 2026 02:25:50 +0000 Subject: [PATCH 01/10] feat: add scam_or_rug_onchain RL environment for token security detection --- .../community/scam_or_rug_onchain/README.md | 59 ++++ .../community/scam_or_rug_onchain/__init__.py | 3 + .../scam_or_rug_onchain/scam_or_rug_env.py | 295 ++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 environments/community/scam_or_rug_onchain/README.md create mode 100644 environments/community/scam_or_rug_onchain/__init__.py create mode 100644 environments/community/scam_or_rug_onchain/scam_or_rug_env.py diff --git a/environments/community/scam_or_rug_onchain/README.md b/environments/community/scam_or_rug_onchain/README.md new file mode 100644 index 000000000..1c7a86f81 --- /dev/null +++ b/environments/community/scam_or_rug_onchain/README.md @@ -0,0 +1,59 @@ +# Scam or Rug On-Chain Environment + +A reinforcement learning environment that trains LLMs to detect token scams and rug pulls from the perspective of an average Web3 user. + +## Overview + +This environment challenges the model to analyze raw on-chain token data and classify tokens as `SCAM`, `RUG_RISK`, or `LEGITIMATE` — then calculate the estimated price impact if malicious actors dump their holdings. + +The detection logic is grounded in real-world Web3 experience, covering the most common attack vectors used in BSC, Ethereum, and Solana token scams. + +## Detection Layers + +1. **Cluster Holdings** — identifies connected wallets funded from the same source holding large % of supply (bundled supply attack) +2. **Liquidity Pool** — checks LP holder type, lock duration, and whether LP is genuinely burned or just unlocked +3. **Mint Authority** — flags tokens where dev can still print new supply +4. **Tax Mechanism** — scores buy/sell tax: 0–1% healthy, 1–5% caution, >5% red flag, >10% danger zone / semi-honeypot +5. **Burn Validity** — distinguishes real burn addresses (`0x000...000`, `0x000...dead`) from fake ones, and checks for `recoverTokens()` / `emergencyWithdraw()` backdoors +6. **Honeypot Detection** — checks whether holders can actually sell, and flags suspiciously high sell tax +7. **Wash Trading** — analyzes buy/sell ratio and unique trader count to detect artificial volume inflation + +## Task Format + +The model receives structured on-chain data and must respond in this format: +``` +CLASSIFICATION: +REASONING: +DUMP IMPACT: % price drop if cluster dumps (or N/A if LEGITIMATE) +CALCULATION: +``` + +## Reward Function + +Scores are computed across three components: + +| Component | Weight | Description | +|---|---|---| +| Classification | 0.4 | Correct label gets full score, adjacent label (SCAM↔RUG_RISK) gets partial | +| Reasoning | 0.3 | Keywords matched across all 7 detection dimensions | +| Math accuracy | 0.3 | Dump impact within 2% → full, within 5% → partial, within 10% → minimal | + +## Dump Impact Formula +``` +tokens_dumped = total_supply × (cluster_pct / 100) +current_price = lp_value / total_supply +new_price = lp_value / (total_supply + tokens_dumped) +price_drop = (current_price - new_price) / current_price × 100 +``` + +## Data Generation + +All token data is synthetically generated with realistic value ranges derived from real-world scam patterns observed across EVM and Solana chains. Each training round randomizes all variable values while keeping the underlying detection logic consistent — forcing the model to generalize rather than memorize. + +## Scope + +This environment covers non-utility tokens with fixed supply (not gas tokens or established governance tokens). Upgradeable contract detection is included as a proxy for potential mint-after-burn attacks. + +## Author + +Contributed by [@ILKokoron](https://github.com/ILKokoron) \ No newline at end of file diff --git a/environments/community/scam_or_rug_onchain/__init__.py b/environments/community/scam_or_rug_onchain/__init__.py new file mode 100644 index 000000000..ca200a7a6 --- /dev/null +++ b/environments/community/scam_or_rug_onchain/__init__.py @@ -0,0 +1,3 @@ +from .scam_or_rug_env import ScamOrRugEnv, ScamOrRugConfig + +__all__ = ["ScamOrRugEnv", "ScamOrRugConfig"] \ No newline at end of file diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py new file mode 100644 index 000000000..2242856c4 --- /dev/null +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -0,0 +1,295 @@ +import random +import re +from dataclasses import dataclass +from atroposlib.envs.base import BaseEnv, BaseEnvConfig, ScoredDataGroup +from atroposlib.type_definitions import Item + +VALID_BURN_ADDRESSES = { + "0x0000000000000000000000000000000000000000", + "0x000000000000000000000000000000000000dead", +} + + +@dataclass +class ScamOrRugConfig(BaseEnvConfig): + tokenizer_name: str = "NousResearch/DeepHermes-3-Llama-3-8B-Preview" + group_size: int = 8 + max_token_len: int = 1024 + num_rollouts: int = 256 + num_iterations: int = 1024 + + +SYSTEM_PROMPT = """You are an on-chain analyst from the perspective of an average Web3 user trying to protect themselves from scams and rug pulls. + +You will be given on-chain token data. Analyze it across these dimensions: +1. Cluster holdings — connected wallets holding large % of supply +2. Liquidity pool — status, holder type, locked or burned +3. Mint authority — can new tokens be created? +4. Tax — buy/sell tax percentage +5. Burn validity — is the burn address legitimate and irreversible? +6. Honeypot — can holders actually sell? +7. Wash trading — is volume artificially inflated? + +Respond in this exact format: +CLASSIFICATION: +REASONING: +DUMP IMPACT: % price drop if cluster dumps entire holding (or N/A if LEGITIMATE) +CALCULATION: """ + + +def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) -> float: + tokens_dumped = supply * (cluster_pct / 100) + current_price = lp_value_usd / supply + new_price = lp_value_usd / (supply + tokens_dumped) + price_drop_pct = ((current_price - new_price) / current_price) * 100 + return round(price_drop_pct, 2) + + +def generate_fake_burn_address() -> str: + fake_patterns = [ + "0x7a9f3c000000000000000000000000000dead1234", + "0x000000000000000000000000000000000001dead", + "0xdead000000000000000000000000000000000001", + "0x" + "0" * 38 + "01", + "0x" + "dead" + "0" * 36, + ] + return random.choice(fake_patterns) + + +def generate_token_data(label: str) -> dict: + + if label == "SCAM": + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(55, 92), 2) + lp_value = round(random.uniform(3_000, 25_000), 2) + sell_tax = round(random.uniform(10, 90), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["active_dev_wallet", "active_single_eoa"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["unlocked", "locked_7days"]), + "lp_holder_type": random.choice(["unknown_personal_wallet", "dev_wallet"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": random.randint(10, 150), + "cluster_funded_from_same_source": True, + "unique_holders": random.randint(50, 400), + "token_age_days": random.randint(1, 7), + "buy_tax_pct": round(random.uniform(0, 10), 2), + "sell_tax_pct": sell_tax, + "can_sell": sell_tax < 95, + "burn_address": generate_fake_burn_address(), + "has_recover_function": random.choice([True, True, False]), + "has_mint_function": True, + "is_upgradeable_contract": random.choice([True, False]), + "volume_24h_usd": round(random.uniform(50_000, 500_000), 2), + "unique_buyers_24h": random.randint(8, 20), + "unique_sellers_24h": random.randint(7, 19), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + elif label == "RUG_RISK": + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(28, 55), 2) + lp_value = round(random.uniform(10_000, 80_000), 2) + sell_tax = round(random.uniform(5, 15), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["active_dev_wallet", "renounced"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["unlocked", "locked_30days", "locked_90days"]), + "lp_holder_type": random.choice(["dev_wallet", "unknown_personal_wallet"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": random.randint(5, 50), + "cluster_funded_from_same_source": random.choice([True, False]), + "unique_holders": random.randint(200, 2000), + "token_age_days": random.randint(3, 60), + "buy_tax_pct": round(random.uniform(0, 5), 2), + "sell_tax_pct": sell_tax, + "can_sell": True, + "burn_address": random.choice([ + generate_fake_burn_address(), + "0x0000000000000000000000000000000000000000" + ]), + "has_recover_function": random.choice([True, False]), + "has_mint_function": random.choice([True, False]), + "is_upgradeable_contract": random.choice([True, False]), + "volume_24h_usd": round(random.uniform(20_000, 200_000), 2), + "unique_buyers_24h": random.randint(15, 40), + "unique_sellers_24h": random.randint(12, 38), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + else: # LEGITIMATE + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(0, 15), 2) + lp_value = round(random.uniform(50_000, 5_000_000), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["renounced", "burned"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["burned", "locked_365days", "locked_180days"]), + "lp_holder_type": random.choice(["dex_contract", "burned"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": random.randint(0, 8), + "cluster_funded_from_same_source": False, + "unique_holders": random.randint(1000, 100_000), + "token_age_days": random.randint(60, 1000), + "buy_tax_pct": round(random.uniform(0, 1), 2), + "sell_tax_pct": round(random.uniform(0, 1), 2), + "can_sell": True, + "burn_address": random.choice(list(VALID_BURN_ADDRESSES)), + "has_recover_function": False, + "has_mint_function": False, + "is_upgradeable_contract": False, + "volume_24h_usd": round(random.uniform(100_000, 10_000_000), 2), + "unique_buyers_24h": random.randint(100, 5000), + "unique_sellers_24h": random.randint(80, 4000), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + +def format_prompt(data: dict) -> str: + buy_sell_ratio = round( + data["unique_buyers_24h"] / max(data["unique_sellers_24h"], 1), 2 + ) + return f"""Analyze this token's on-chain data: + +[SUPPLY & MINT] +Total Supply (fixed): {data['total_supply']:,} tokens +Mint Authority: {data['mint_authority']} +Has Mint Function: {data['has_mint_function']} +Upgradeable Contract: {data['is_upgradeable_contract']} + +[LIQUIDITY POOL] +LP Value: ${data['lp_value_usd']:,.2f} USD +LP Status: {data['lp_status']} +LP Holder Type: {data['lp_holder_type']} + +[BURN] +Burn Address: {data['burn_address']} +Has Recover/Withdraw Function: {data['has_recover_function']} + +[TAX] +Buy Tax: {data['buy_tax_pct']}% +Sell Tax: {data['sell_tax_pct']}% +Can Holders Sell: {data['can_sell']} + +[HOLDER DISTRIBUTION] +Connected Cluster Holdings: {data['cluster_holding_pct']}% of supply +Wallets in Cluster: {data['cluster_wallet_count']} +Funded From Same Source: {data['cluster_funded_from_same_source']} +Unique Holders: {data['unique_holders']} +Token Age: {data['token_age_days']} days + +[TRADING ACTIVITY 24H] +Volume: ${data['volume_24h_usd']:,.2f} USD +Unique Buyers: {data['unique_buyers_24h']} +Unique Sellers: {data['unique_sellers_24h']} +Buy/Sell Ratio: {buy_sell_ratio} + +Classify this token, explain your reasoning across all dimensions, and calculate the estimated price drop if the cluster dumps their entire holding into the LP.""" + + +def score_response(response: str, data: dict, true_label: str) -> float: + score = 0.0 + response_upper = response.upper() + + # 1. Classification (0.4) + classification = None + for label in ["SCAM", "RUG_RISK", "LEGITIMATE"]: + if f"CLASSIFICATION: {label}" in response_upper: + classification = label + break + + if classification == true_label: + score += 0.4 + elif ( + (true_label == "SCAM" and classification == "RUG_RISK") or + (true_label == "RUG_RISK" and classification == "SCAM") + ): + score += 0.1 + + # 2. Reasoning quality (0.3) — checks across all 7 dimensions + keywords = { + "SCAM": ["cluster", "mint", "tax", "sell", "honeypot", "burn", "wash", "fake", "recover"], + "RUG_RISK": ["cluster", "lp", "lock", "tax", "risk", "upgrade", "dev"], + "LEGITIMATE": ["renounced", "burned", "locked", "dex", "distributed", "healthy", "low tax"], + } + kws = keywords.get(true_label, []) + matched = sum(1 for kw in kws if kw in response.lower()) + score += min(matched / len(kws), 1.0) * 0.3 + + # 3. Math accuracy (0.3) + if true_label in ["SCAM", "RUG_RISK"]: + true_impact = data["true_dump_impact"] + match = re.search(r"DUMP IMPACT:\s*([\d.]+)%", response, re.IGNORECASE) + if match: + try: + ai_impact = float(match.group(1)) + diff = abs(ai_impact - true_impact) + if diff <= 2.0: + score += 0.3 + elif diff <= 5.0: + score += 0.15 + elif diff <= 10.0: + score += 0.05 + except ValueError: + pass + else: + if "n/a" in response.lower() or "not applicable" in response.lower(): + score += 0.3 + + return round(score, 4) + + +class ScamOrRugEnv(BaseEnv): + + def __init__(self, config: ScamOrRugConfig, **kwargs): + super().__init__(config, **kwargs) + self.labels = ["SCAM", "RUG_RISK", "LEGITIMATE"] + + @classmethod + def config_init(cls) -> ScamOrRugConfig: + return ScamOrRugConfig() + + async def collect_trajectories(self, item) -> tuple: + label = random.choice(self.labels) + data = generate_token_data(label) + prompt = format_prompt(data) + + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ] + + completions = await self.server.completion( + messages=messages, + n=self.config.group_size, + max_tokens=self.config.max_token_len, + ) + + scored = ScoredDataGroup() + scored["tokens"] = [] + scored["masks"] = [] + scored["scores"] = [] + + for completion in completions.choices: + response = completion.message.content + reward = score_response(response, data, label) + tokens, masks = self.tokenize_for_training(messages, response) + scored["tokens"].append(tokens) + scored["masks"].append(masks) + scored["scores"].append(reward) + + return scored, {} + + async def get_next_item(self) -> Item: + label = random.choice(self.labels) + return (label,) + + async def evaluate(self, *args, **kwargs): + pass + + +if __name__ == "__main__": + ScamOrRugEnv.cli() \ No newline at end of file From 551cc7187d3a9bb4317bcef556b0e400b24f7138 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Tue, 31 Mar 2026 08:29:02 +0000 Subject: [PATCH 02/10] fix: align ScamOrRugEnv with BaseEnv API and add wandb logging --- .../scam_or_rug_onchain/scam_or_rug_env.py | 74 ++++++++++++++----- 1 file changed, 57 insertions(+), 17 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index 2242856c4..bc0fd0127 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -1,7 +1,10 @@ import random import re from dataclasses import dataclass +from typing import Optional + from atroposlib.envs.base import BaseEnv, BaseEnvConfig, ScoredDataGroup +from atroposlib.envs.server_handling.server_baseline import ServerBaseline from atroposlib.type_definitions import Item VALID_BURN_ADDRESSES = { @@ -14,9 +17,9 @@ class ScamOrRugConfig(BaseEnvConfig): tokenizer_name: str = "NousResearch/DeepHermes-3-Llama-3-8B-Preview" group_size: int = 8 - max_token_len: int = 1024 - num_rollouts: int = 256 - num_iterations: int = 1024 + max_token_length: int = 1024 + num_rollouts_to_keep: int = 32 + total_steps: int = 1000 SYSTEM_PROMPT = """You are an on-chain analyst from the perspective of an average Web3 user trying to protect themselves from scams and rug pulls. @@ -57,7 +60,6 @@ def generate_fake_burn_address() -> str: def generate_token_data(label: str) -> dict: - if label == "SCAM": supply = random.randint(1_000_000, 1_000_000_000_000) cluster_pct = round(random.uniform(55, 92), 2) @@ -108,7 +110,7 @@ def generate_token_data(label: str) -> dict: "can_sell": True, "burn_address": random.choice([ generate_fake_burn_address(), - "0x0000000000000000000000000000000000000000" + "0x0000000000000000000000000000000000000000", ]), "has_recover_function": random.choice([True, False]), "has_mint_function": random.choice([True, False]), @@ -204,12 +206,12 @@ def score_response(response: str, data: dict, true_label: str) -> float: if classification == true_label: score += 0.4 elif ( - (true_label == "SCAM" and classification == "RUG_RISK") or - (true_label == "RUG_RISK" and classification == "SCAM") + (true_label == "SCAM" and classification == "RUG_RISK") + or (true_label == "RUG_RISK" and classification == "SCAM") ): score += 0.1 - # 2. Reasoning quality (0.3) — checks across all 7 dimensions + # 2. Reasoning quality (0.3) keywords = { "SCAM": ["cluster", "mint", "tax", "sell", "honeypot", "burn", "wash", "fake", "recover"], "RUG_RISK": ["cluster", "lp", "lock", "tax", "risk", "upgrade", "dev"], @@ -243,17 +245,26 @@ def score_response(response: str, data: dict, true_label: str) -> float: class ScamOrRugEnv(BaseEnv): + name = "scam_or_rug_onchain" def __init__(self, config: ScamOrRugConfig, **kwargs): super().__init__(config, **kwargs) self.labels = ["SCAM", "RUG_RISK", "LEGITIMATE"] + self.percent_correct_buffer = [] @classmethod - def config_init(cls) -> ScamOrRugConfig: - return ScamOrRugConfig() + def config_init(cls): + return ScamOrRugConfig(), ServerBaseline() - async def collect_trajectories(self, item) -> tuple: + async def setup(self): + pass + + async def get_next_item(self) -> Item: label = random.choice(self.labels) + return (label,) + + async def collect_trajectories(self, item) -> tuple: + label = item[0] data = generate_token_data(label) prompt = format_prompt(data) @@ -265,7 +276,7 @@ async def collect_trajectories(self, item) -> tuple: completions = await self.server.completion( messages=messages, n=self.config.group_size, - max_tokens=self.config.max_token_len, + max_tokens=self.config.max_token_length, ) scored = ScoredDataGroup() @@ -276,16 +287,45 @@ async def collect_trajectories(self, item) -> tuple: for completion in completions.choices: response = completion.message.content reward = score_response(response, data, label) - tokens, masks = self.tokenize_for_training(messages, response) + + # tokenize + full_text = self.tokenizer.apply_chat_template( + messages + [{"role": "assistant", "content": response}], + tokenize=True, + add_generation_prompt=False, + ) + prompt_text = self.tokenizer.apply_chat_template( + messages, + tokenize=True, + add_generation_prompt=True, + ) + tokens = full_text + masks = [-100] * len(prompt_text) + full_text[len(prompt_text):] + scored["tokens"].append(tokens) scored["masks"].append(masks) scored["scores"].append(reward) - return scored, {} + # track accuracy + response_upper = response.upper() + for lbl in ["SCAM", "RUG_RISK", "LEGITIMATE"]: + if f"CLASSIFICATION: {lbl}" in response_upper: + self.percent_correct_buffer.append(1.0 if lbl == label else 0.0) + break - async def get_next_item(self) -> Item: - label = random.choice(self.labels) - return (label,) + return scored, [] + + async def wandb_log(self, wandb_metrics: Optional[dict] = None): + if wandb_metrics is None: + wandb_metrics = {} + + if self.percent_correct_buffer: + wandb_metrics["train/percent_correct"] = sum( + self.percent_correct_buffer + ) / len(self.percent_correct_buffer) + self.percent_correct_buffer = [] + + await super().wandb_log(wandb_metrics) async def evaluate(self, *args, **kwargs): pass From dad40fba6bda83e6a85bea6645d26c5f44ffce70 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:13:21 +0000 Subject: [PATCH 03/10] fix: align ScamOrRugEnv with BaseEnv API and add wandb logging --- environments/community/scam_or_rug_onchain/scam_or_rug_env.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index bc0fd0127..a5451dfa4 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -300,7 +300,8 @@ async def collect_trajectories(self, item) -> tuple: add_generation_prompt=True, ) tokens = full_text - masks = [-100] * len(prompt_text) + full_text[len(prompt_text):] + prompt_len = len(prompt_text) + masks = [-100] * prompt_len + [1] * (len(full_text) - prompt_len) scored["tokens"].append(tokens) scored["masks"].append(masks) From 4c0c2b7e8bdf0e7fd81a0253ad766958f1e96b08 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Tue, 31 Mar 2026 10:15:11 +0000 Subject: [PATCH 04/10] fix: align ScamOrRugEnv with BaseEnv API and add wandb logging --- environments/community/scam_or_rug_onchain/scam_or_rug_env.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index a5451dfa4..f57c37da8 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -300,8 +300,8 @@ async def collect_trajectories(self, item) -> tuple: add_generation_prompt=True, ) tokens = full_text - prompt_len = len(prompt_text) - masks = [-100] * prompt_len + [1] * (len(full_text) - prompt_len) + prompt_len = len(prompt_text) + masks = [-100] * prompt_len + [1] * (len(full_text) - prompt_len) scored["tokens"].append(tokens) scored["masks"].append(masks) From 62c48af21c4c07efe22fba85f36dbdea8316a623 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Wed, 1 Apr 2026 06:26:02 +0000 Subject: [PATCH 05/10] improve: score_response malformed handling, adjacent scoring, keywords fix --- .../community/scam_or_rug_onchain/scam_or_rug_env.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index f57c37da8..ba2cb6f92 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -203,19 +203,22 @@ def score_response(response: str, data: dict, true_label: str) -> float: classification = label break + if classification is None: + return 0.0 + if classification == true_label: score += 0.4 elif ( (true_label == "SCAM" and classification == "RUG_RISK") or (true_label == "RUG_RISK" and classification == "SCAM") ): - score += 0.1 + score += 0.2 # 2. Reasoning quality (0.3) keywords = { "SCAM": ["cluster", "mint", "tax", "sell", "honeypot", "burn", "wash", "fake", "recover"], "RUG_RISK": ["cluster", "lp", "lock", "tax", "risk", "upgrade", "dev"], - "LEGITIMATE": ["renounced", "burned", "locked", "dex", "distributed", "healthy", "low tax"], + "LEGITIMATE": ["renounced", "burned", "locked", "dex", "distributed", "healthy", "low", "tax"], } kws = keywords.get(true_label, []) matched = sum(1 for kw in kws if kw in response.lower()) From c010e50440e748f420e195f2c56fad6163935bda Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Wed, 1 Apr 2026 06:34:01 +0000 Subject: [PATCH 06/10] improve: score_response malformed handling, adjacent scoring, keywords fix --- .../scam_or_rug_onchain/scam_or_rug_env.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index ba2cb6f92..756623dbf 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -41,11 +41,14 @@ class ScamOrRugConfig(BaseEnvConfig): def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) -> float: - tokens_dumped = supply * (cluster_pct / 100) - current_price = lp_value_usd / supply - new_price = lp_value_usd / (supply + tokens_dumped) - price_drop_pct = ((current_price - new_price) / current_price) * 100 - return round(price_drop_pct, 2) + if supply == 0 or lp_value_usd == 0: + return 0.0 + tokens_dumped = supply * (cluster_pct / 100.0) + # token side of LP ≈ half of total LP value + token_reserve = lp_value_usd / 2 + current_price = token_reserve / supply + price_impact = tokens_dumped / (tokens_dumped + (token_reserve / current_price)) + return round(price_impact * 100, 2) def generate_fake_burn_address() -> str: From 5a7074e303a95a052ab54f2d9d0fe719783724a6 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Wed, 1 Apr 2026 06:40:13 +0000 Subject: [PATCH 07/10] improve: fix dump impact formula, fake burn addresses, cluster wallet count --- .../scam_or_rug_onchain/scam_or_rug_env.py | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index 756623dbf..52d94d7c7 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -41,23 +41,36 @@ class ScamOrRugConfig(BaseEnvConfig): def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) -> float: - if supply == 0 or lp_value_usd == 0: + """ + Approximate price impact if a cluster dumps their entire holding into the LP. + Menggunakan simplified constant product approximation (token side ≈ 50% of LP value). + """ + if supply <= 0 or lp_value_usd <= 0: return 0.0 + tokens_dumped = supply * (cluster_pct / 100.0) - # token side of LP ≈ half of total LP value - token_reserve = lp_value_usd / 2 - current_price = token_reserve / supply - price_impact = tokens_dumped / (tokens_dumped + (token_reserve / current_price)) + + # Token reserve in USD ≈ setengah dari total LP value (assumption pool balanced) + token_reserve_usd = lp_value_usd / 2.0 + current_price = token_reserve_usd / supply + + # Current token amount in the pool (rough) + current_token_reserve = token_reserve_usd / current_price if current_price > 0 else supply + + # Price impact formula (simplified slippage) + price_impact = tokens_dumped / (tokens_dumped + current_token_reserve) + return round(price_impact * 100, 2) def generate_fake_burn_address() -> str: + """Generate fake burn address that looks suspicious but not too obvious.""" fake_patterns = [ - "0x7a9f3c000000000000000000000000000dead1234", - "0x000000000000000000000000000000000001dead", + "0x00000000000000000000000000000000000dead1", + "0x0000000000000000000000000000000000dead22", "0xdead000000000000000000000000000000000001", - "0x" + "0" * 38 + "01", - "0x" + "dead" + "0" * 36, + "0x" + "".join(random.choices("0123456789abcdef", k=40)), + "0x" + "0" * 20 + "dead" + "0" * 16, ] return random.choice(fake_patterns) @@ -135,7 +148,7 @@ def generate_token_data(label: str) -> dict: "lp_status": random.choice(["burned", "locked_365days", "locked_180days"]), "lp_holder_type": random.choice(["dex_contract", "burned"]), "cluster_holding_pct": cluster_pct, - "cluster_wallet_count": random.randint(0, 8), + "cluster_wallet_count": random.randint(0, max(3, int(cluster_pct * 0.5))) if cluster_pct > 0 else 0, "cluster_funded_from_same_source": False, "unique_holders": random.randint(1000, 100_000), "token_age_days": random.randint(60, 1000), From 083a4f7ca519e4edbb5b2ceeb42b5f76298b01a7 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Wed, 1 Apr 2026 07:01:52 +0000 Subject: [PATCH 08/10] improve: fix dump impact formula, regex, fake burn, cluster count, comments --- .../scam_or_rug_onchain/scam_or_rug_env.py | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index 52d94d7c7..ac985f50a 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -21,7 +21,7 @@ class ScamOrRugConfig(BaseEnvConfig): num_rollouts_to_keep: int = 32 total_steps: int = 1000 - +# System prompt enforces structured output format for consistent scoring SYSTEM_PROMPT = """You are an on-chain analyst from the perspective of an average Web3 user trying to protect themselves from scams and rug pulls. You will be given on-chain token data. Analyze it across these dimensions: @@ -55,7 +55,7 @@ def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) current_price = token_reserve_usd / supply # Current token amount in the pool (rough) - current_token_reserve = token_reserve_usd / current_price if current_price > 0 else supply + current_token_reserve = token_reserve_usd / current_price if current_price > 0 else 0.0 # Price impact formula (simplified slippage) price_impact = tokens_dumped / (tokens_dumped + current_token_reserve) @@ -69,12 +69,12 @@ def generate_fake_burn_address() -> str: "0x00000000000000000000000000000000000dead1", "0x0000000000000000000000000000000000dead22", "0xdead000000000000000000000000000000000001", - "0x" + "".join(random.choices("0123456789abcdef", k=40)), + "0x" + "".join(random.choices("0123456789abcdef", k=40)), # ini yang diperbaiki "0x" + "0" * 20 + "dead" + "0" * 16, ] return random.choice(fake_patterns) - +# Synthetic token data generator — values are randomized but grounded in real-world scam patterns def generate_token_data(label: str) -> dict: if label == "SCAM": supply = random.randint(1_000_000, 1_000_000_000_000) @@ -96,7 +96,7 @@ def generate_token_data(label: str) -> dict: "sell_tax_pct": sell_tax, "can_sell": sell_tax < 95, "burn_address": generate_fake_burn_address(), - "has_recover_function": random.choice([True, True, False]), + "has_recover_function": random.choice([True, False]), "has_mint_function": True, "is_upgradeable_contract": random.choice([True, False]), "volume_24h_usd": round(random.uniform(50_000, 500_000), 2), @@ -148,7 +148,7 @@ def generate_token_data(label: str) -> dict: "lp_status": random.choice(["burned", "locked_365days", "locked_180days"]), "lp_holder_type": random.choice(["dex_contract", "burned"]), "cluster_holding_pct": cluster_pct, - "cluster_wallet_count": random.randint(0, max(3, int(cluster_pct * 0.5))) if cluster_pct > 0 else 0, + "cluster_wallet_count": random.randint(0, max(2, int(cluster_pct))) if cluster_pct > 0 else 0, "cluster_funded_from_same_source": False, "unique_holders": random.randint(1000, 100_000), "token_age_days": random.randint(60, 1000), @@ -243,7 +243,8 @@ def score_response(response: str, data: dict, true_label: str) -> float: # 3. Math accuracy (0.3) if true_label in ["SCAM", "RUG_RISK"]: true_impact = data["true_dump_impact"] - match = re.search(r"DUMP IMPACT:\s*([\d.]+)%", response, re.IGNORECASE) + # Lebih robust: tangkap angka di depan % setelah "DUMP IMPACT" + match = re.search(r"DUMP IMPACT[:\s]*([\d.]+)\s*%", response, re.IGNORECASE) if match: try: ai_impact = float(match.group(1)) @@ -256,7 +257,7 @@ def score_response(response: str, data: dict, true_label: str) -> float: score += 0.05 except ValueError: pass - else: + else: # LEGITIMATE if "n/a" in response.lower() or "not applicable" in response.lower(): score += 0.3 @@ -264,6 +265,11 @@ def score_response(response: str, data: dict, true_label: str) -> float: class ScamOrRugEnv(BaseEnv): + """ + Environment for training on-chain scam/rug pull detection. + Uses synthetic token data across 3 labels: SCAM, RUG_RISK, LEGITIMATE. + Reward based on correct classification + reasoning keywords + dump impact math. + """ name = "scam_or_rug_onchain" def __init__(self, config: ScamOrRugConfig, **kwargs): From 3d0d9f946054f5dc9325812330655a56108306a0 Mon Sep 17 00:00:00 2001 From: kokoron <78990112+ILKokoron@users.noreply.github.com> Date: Wed, 1 Apr 2026 07:29:33 +0000 Subject: [PATCH 09/10] fix: resolve flake8 E501 and restore full class structure --- .../scam_or_rug_onchain/scam_or_rug_env.py | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index ac985f50a..f7dfb5e1c 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -7,6 +7,7 @@ from atroposlib.envs.server_handling.server_baseline import ServerBaseline from atroposlib.type_definitions import Item + VALID_BURN_ADDRESSES = { "0x0000000000000000000000000000000000000000", "0x000000000000000000000000000000000000dead", @@ -21,23 +22,25 @@ class ScamOrRugConfig(BaseEnvConfig): num_rollouts_to_keep: int = 32 total_steps: int = 1000 -# System prompt enforces structured output format for consistent scoring -SYSTEM_PROMPT = """You are an on-chain analyst from the perspective of an average Web3 user trying to protect themselves from scams and rug pulls. - -You will be given on-chain token data. Analyze it across these dimensions: -1. Cluster holdings — connected wallets holding large % of supply -2. Liquidity pool — status, holder type, locked or burned -3. Mint authority — can new tokens be created? -4. Tax — buy/sell tax percentage -5. Burn validity — is the burn address legitimate and irreversible? -6. Honeypot — can holders actually sell? -7. Wash trading — is volume artificially inflated? -Respond in this exact format: -CLASSIFICATION: -REASONING: -DUMP IMPACT: % price drop if cluster dumps entire holding (or N/A if LEGITIMATE) -CALCULATION: """ +# System prompt enforces structured output format for consistent scoring +SYSTEM_PROMPT = ( + "You are an on-chain analyst from the perspective of an average Web3 user trying to " + "protect themselves from scams and rug pulls.\n\n" + "You will be given on-chain token data. Analyze it across these dimensions:\n" + "1. Cluster holdings — connected wallets holding large % of supply\n" + "2. Liquidity pool — status, holder type, locked or burned\n" + "3. Mint authority — can new tokens be created?\n" + "4. Tax — buy/sell tax percentage\n" + "5. Burn validity — is the burn address legitimate and irreversible?\n" + "6. Honeypot — can holders actually sell?\n" + "7. Wash trading — is volume artificially inflated?\n\n" + "Respond in this exact format:\n" + "CLASSIFICATION: \n" + "REASONING: \n" + "DUMP IMPACT: % price drop if cluster dumps entire holding (or N/A if LEGITIMATE)\n" + "CALCULATION: " +) def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) -> float: @@ -69,11 +72,12 @@ def generate_fake_burn_address() -> str: "0x00000000000000000000000000000000000dead1", "0x0000000000000000000000000000000000dead22", "0xdead000000000000000000000000000000000001", - "0x" + "".join(random.choices("0123456789abcdef", k=40)), # ini yang diperbaiki + "0x" + "".join(random.choices("0123456789abcdef", k=40)), "0x" + "0" * 20 + "dead" + "0" * 16, ] return random.choice(fake_patterns) + # Synthetic token data generator — values are randomized but grounded in real-world scam patterns def generate_token_data(label: str) -> dict: if label == "SCAM": @@ -205,7 +209,8 @@ def format_prompt(data: dict) -> str: Unique Sellers: {data['unique_sellers_24h']} Buy/Sell Ratio: {buy_sell_ratio} -Classify this token, explain your reasoning across all dimensions, and calculate the estimated price drop if the cluster dumps their entire holding into the LP.""" +Classify this token, explain your reasoning across all dimensions, +and calculate the estimated price drop if the cluster dumps their entire holding into the LP.""" def score_response(response: str, data: dict, true_label: str) -> float: @@ -215,6 +220,7 @@ def score_response(response: str, data: dict, true_label: str) -> float: # 1. Classification (0.4) classification = None for label in ["SCAM", "RUG_RISK", "LEGITIMATE"]: + # Check if the exact classification format is present if f"CLASSIFICATION: {label}" in response_upper: classification = label break @@ -243,7 +249,6 @@ def score_response(response: str, data: dict, true_label: str) -> float: # 3. Math accuracy (0.3) if true_label in ["SCAM", "RUG_RISK"]: true_impact = data["true_dump_impact"] - # Lebih robust: tangkap angka di depan % setelah "DUMP IMPACT" match = re.search(r"DUMP IMPACT[:\s]*([\d.]+)\s*%", response, re.IGNORECASE) if match: try: From 993bc8e1844b227f754d1026fa032155cd53ab6b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Apr 2026 07:38:25 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../community/scam_or_rug_onchain/README.md | 2 +- .../community/scam_or_rug_onchain/__init__.py | 4 +- .../scam_or_rug_onchain/scam_or_rug_env.py | 56 ++++++++++++++----- 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/environments/community/scam_or_rug_onchain/README.md b/environments/community/scam_or_rug_onchain/README.md index 1c7a86f81..7a830379e 100644 --- a/environments/community/scam_or_rug_onchain/README.md +++ b/environments/community/scam_or_rug_onchain/README.md @@ -56,4 +56,4 @@ This environment covers non-utility tokens with fixed supply (not gas tokens or ## Author -Contributed by [@ILKokoron](https://github.com/ILKokoron) \ No newline at end of file +Contributed by [@ILKokoron](https://github.com/ILKokoron) diff --git a/environments/community/scam_or_rug_onchain/__init__.py b/environments/community/scam_or_rug_onchain/__init__.py index ca200a7a6..91000bac0 100644 --- a/environments/community/scam_or_rug_onchain/__init__.py +++ b/environments/community/scam_or_rug_onchain/__init__.py @@ -1,3 +1,3 @@ -from .scam_or_rug_env import ScamOrRugEnv, ScamOrRugConfig +from .scam_or_rug_env import ScamOrRugConfig, ScamOrRugEnv -__all__ = ["ScamOrRugEnv", "ScamOrRugConfig"] \ No newline at end of file +__all__ = ["ScamOrRugEnv", "ScamOrRugConfig"] diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py index f7dfb5e1c..d495fffde 100644 --- a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -7,7 +7,6 @@ from atroposlib.envs.server_handling.server_baseline import ServerBaseline from atroposlib.type_definitions import Item - VALID_BURN_ADDRESSES = { "0x0000000000000000000000000000000000000000", "0x000000000000000000000000000000000000dead", @@ -43,7 +42,9 @@ class ScamOrRugConfig(BaseEnvConfig): ) -def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) -> float: +def calculate_dump_impact( + supply: int, lp_value_usd: float, cluster_pct: float +) -> float: """ Approximate price impact if a cluster dumps their entire holding into the LP. Menggunakan simplified constant product approximation (token side ≈ 50% of LP value). @@ -58,7 +59,9 @@ def calculate_dump_impact(supply: int, lp_value_usd: float, cluster_pct: float) current_price = token_reserve_usd / supply # Current token amount in the pool (rough) - current_token_reserve = token_reserve_usd / current_price if current_price > 0 else 0.0 + current_token_reserve = ( + token_reserve_usd / current_price if current_price > 0 else 0.0 + ) # Price impact formula (simplified slippage) price_impact = tokens_dumped / (tokens_dumped + current_token_reserve) @@ -128,10 +131,12 @@ def generate_token_data(label: str) -> dict: "buy_tax_pct": round(random.uniform(0, 5), 2), "sell_tax_pct": sell_tax, "can_sell": True, - "burn_address": random.choice([ - generate_fake_burn_address(), - "0x0000000000000000000000000000000000000000", - ]), + "burn_address": random.choice( + [ + generate_fake_burn_address(), + "0x0000000000000000000000000000000000000000", + ] + ), "has_recover_function": random.choice([True, False]), "has_mint_function": random.choice([True, False]), "is_upgradeable_contract": random.choice([True, False]), @@ -152,7 +157,9 @@ def generate_token_data(label: str) -> dict: "lp_status": random.choice(["burned", "locked_365days", "locked_180days"]), "lp_holder_type": random.choice(["dex_contract", "burned"]), "cluster_holding_pct": cluster_pct, - "cluster_wallet_count": random.randint(0, max(2, int(cluster_pct))) if cluster_pct > 0 else 0, + "cluster_wallet_count": ( + random.randint(0, max(2, int(cluster_pct))) if cluster_pct > 0 else 0 + ), "cluster_funded_from_same_source": False, "unique_holders": random.randint(1000, 100_000), "token_age_days": random.randint(60, 1000), @@ -209,7 +216,7 @@ def format_prompt(data: dict) -> str: Unique Sellers: {data['unique_sellers_24h']} Buy/Sell Ratio: {buy_sell_ratio} -Classify this token, explain your reasoning across all dimensions, +Classify this token, explain your reasoning across all dimensions, and calculate the estimated price drop if the cluster dumps their entire holding into the LP.""" @@ -230,17 +237,35 @@ def score_response(response: str, data: dict, true_label: str) -> float: if classification == true_label: score += 0.4 - elif ( - (true_label == "SCAM" and classification == "RUG_RISK") - or (true_label == "RUG_RISK" and classification == "SCAM") + elif (true_label == "SCAM" and classification == "RUG_RISK") or ( + true_label == "RUG_RISK" and classification == "SCAM" ): score += 0.2 # 2. Reasoning quality (0.3) keywords = { - "SCAM": ["cluster", "mint", "tax", "sell", "honeypot", "burn", "wash", "fake", "recover"], + "SCAM": [ + "cluster", + "mint", + "tax", + "sell", + "honeypot", + "burn", + "wash", + "fake", + "recover", + ], "RUG_RISK": ["cluster", "lp", "lock", "tax", "risk", "upgrade", "dev"], - "LEGITIMATE": ["renounced", "burned", "locked", "dex", "distributed", "healthy", "low", "tax"], + "LEGITIMATE": [ + "renounced", + "burned", + "locked", + "dex", + "distributed", + "healthy", + "low", + "tax", + ], } kws = keywords.get(true_label, []) matched = sum(1 for kw in kws if kw in response.lower()) @@ -275,6 +300,7 @@ class ScamOrRugEnv(BaseEnv): Uses synthetic token data across 3 labels: SCAM, RUG_RISK, LEGITIMATE. Reward based on correct classification + reasoning keywords + dump impact math. """ + name = "scam_or_rug_onchain" def __init__(self, config: ScamOrRugConfig, **kwargs): @@ -363,4 +389,4 @@ async def evaluate(self, *args, **kwargs): if __name__ == "__main__": - ScamOrRugEnv.cli() \ No newline at end of file + ScamOrRugEnv.cli()