diff --git a/environments/community/scam_or_rug_onchain/README.md b/environments/community/scam_or_rug_onchain/README.md new file mode 100644 index 000000000..7a830379e --- /dev/null +++ b/environments/community/scam_or_rug_onchain/README.md @@ -0,0 +1,59 @@ +# Scam or Rug On-Chain Environment + +A reinforcement learning environment that trains LLMs to detect token scams and rug pulls from the perspective of an average Web3 user. + +## Overview + +This environment challenges the model to analyze raw on-chain token data and classify tokens as `SCAM`, `RUG_RISK`, or `LEGITIMATE` — then calculate the estimated price impact if malicious actors dump their holdings. + +The detection logic is grounded in real-world Web3 experience, covering the most common attack vectors used in BSC, Ethereum, and Solana token scams. + +## Detection Layers + +1. **Cluster Holdings** — identifies connected wallets funded from the same source holding large % of supply (bundled supply attack) +2. **Liquidity Pool** — checks LP holder type, lock duration, and whether LP is genuinely burned or just unlocked +3. **Mint Authority** — flags tokens where dev can still print new supply +4. **Tax Mechanism** — scores buy/sell tax: 0–1% healthy, 1–5% caution, >5% red flag, >10% danger zone / semi-honeypot +5. **Burn Validity** — distinguishes real burn addresses (`0x000...000`, `0x000...dead`) from fake ones, and checks for `recoverTokens()` / `emergencyWithdraw()` backdoors +6. **Honeypot Detection** — checks whether holders can actually sell, and flags suspiciously high sell tax +7. **Wash Trading** — analyzes buy/sell ratio and unique trader count to detect artificial volume inflation + +## Task Format + +The model receives structured on-chain data and must respond in this format: +``` +CLASSIFICATION: +REASONING: +DUMP IMPACT: % price drop if cluster dumps (or N/A if LEGITIMATE) +CALCULATION: +``` + +## Reward Function + +Scores are computed across three components: + +| Component | Weight | Description | +|---|---|---| +| Classification | 0.4 | Correct label gets full score, adjacent label (SCAM↔RUG_RISK) gets partial | +| Reasoning | 0.3 | Keywords matched across all 7 detection dimensions | +| Math accuracy | 0.3 | Dump impact within 2% → full, within 5% → partial, within 10% → minimal | + +## Dump Impact Formula +``` +tokens_dumped = total_supply × (cluster_pct / 100) +current_price = lp_value / total_supply +new_price = lp_value / (total_supply + tokens_dumped) +price_drop = (current_price - new_price) / current_price × 100 +``` + +## Data Generation + +All token data is synthetically generated with realistic value ranges derived from real-world scam patterns observed across EVM and Solana chains. Each training round randomizes all variable values while keeping the underlying detection logic consistent — forcing the model to generalize rather than memorize. + +## Scope + +This environment covers non-utility tokens with fixed supply (not gas tokens or established governance tokens). Upgradeable contract detection is included as a proxy for potential mint-after-burn attacks. + +## Author + +Contributed by [@ILKokoron](https://github.com/ILKokoron) diff --git a/environments/community/scam_or_rug_onchain/__init__.py b/environments/community/scam_or_rug_onchain/__init__.py new file mode 100644 index 000000000..91000bac0 --- /dev/null +++ b/environments/community/scam_or_rug_onchain/__init__.py @@ -0,0 +1,3 @@ +from .scam_or_rug_env import ScamOrRugConfig, ScamOrRugEnv + +__all__ = ["ScamOrRugEnv", "ScamOrRugConfig"] diff --git a/environments/community/scam_or_rug_onchain/scam_or_rug_env.py b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py new file mode 100644 index 000000000..d495fffde --- /dev/null +++ b/environments/community/scam_or_rug_onchain/scam_or_rug_env.py @@ -0,0 +1,392 @@ +import random +import re +from dataclasses import dataclass +from typing import Optional + +from atroposlib.envs.base import BaseEnv, BaseEnvConfig, ScoredDataGroup +from atroposlib.envs.server_handling.server_baseline import ServerBaseline +from atroposlib.type_definitions import Item + +VALID_BURN_ADDRESSES = { + "0x0000000000000000000000000000000000000000", + "0x000000000000000000000000000000000000dead", +} + + +@dataclass +class ScamOrRugConfig(BaseEnvConfig): + tokenizer_name: str = "NousResearch/DeepHermes-3-Llama-3-8B-Preview" + group_size: int = 8 + max_token_length: int = 1024 + num_rollouts_to_keep: int = 32 + total_steps: int = 1000 + + +# System prompt enforces structured output format for consistent scoring +SYSTEM_PROMPT = ( + "You are an on-chain analyst from the perspective of an average Web3 user trying to " + "protect themselves from scams and rug pulls.\n\n" + "You will be given on-chain token data. Analyze it across these dimensions:\n" + "1. Cluster holdings — connected wallets holding large % of supply\n" + "2. Liquidity pool — status, holder type, locked or burned\n" + "3. Mint authority — can new tokens be created?\n" + "4. Tax — buy/sell tax percentage\n" + "5. Burn validity — is the burn address legitimate and irreversible?\n" + "6. Honeypot — can holders actually sell?\n" + "7. Wash trading — is volume artificially inflated?\n\n" + "Respond in this exact format:\n" + "CLASSIFICATION: \n" + "REASONING: \n" + "DUMP IMPACT: % price drop if cluster dumps entire holding (or N/A if LEGITIMATE)\n" + "CALCULATION: " +) + + +def calculate_dump_impact( + supply: int, lp_value_usd: float, cluster_pct: float +) -> float: + """ + Approximate price impact if a cluster dumps their entire holding into the LP. + Menggunakan simplified constant product approximation (token side ≈ 50% of LP value). + """ + if supply <= 0 or lp_value_usd <= 0: + return 0.0 + + tokens_dumped = supply * (cluster_pct / 100.0) + + # Token reserve in USD ≈ setengah dari total LP value (assumption pool balanced) + token_reserve_usd = lp_value_usd / 2.0 + current_price = token_reserve_usd / supply + + # Current token amount in the pool (rough) + current_token_reserve = ( + token_reserve_usd / current_price if current_price > 0 else 0.0 + ) + + # Price impact formula (simplified slippage) + price_impact = tokens_dumped / (tokens_dumped + current_token_reserve) + + return round(price_impact * 100, 2) + + +def generate_fake_burn_address() -> str: + """Generate fake burn address that looks suspicious but not too obvious.""" + fake_patterns = [ + "0x00000000000000000000000000000000000dead1", + "0x0000000000000000000000000000000000dead22", + "0xdead000000000000000000000000000000000001", + "0x" + "".join(random.choices("0123456789abcdef", k=40)), + "0x" + "0" * 20 + "dead" + "0" * 16, + ] + return random.choice(fake_patterns) + + +# Synthetic token data generator — values are randomized but grounded in real-world scam patterns +def generate_token_data(label: str) -> dict: + if label == "SCAM": + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(55, 92), 2) + lp_value = round(random.uniform(3_000, 25_000), 2) + sell_tax = round(random.uniform(10, 90), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["active_dev_wallet", "active_single_eoa"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["unlocked", "locked_7days"]), + "lp_holder_type": random.choice(["unknown_personal_wallet", "dev_wallet"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": random.randint(10, 150), + "cluster_funded_from_same_source": True, + "unique_holders": random.randint(50, 400), + "token_age_days": random.randint(1, 7), + "buy_tax_pct": round(random.uniform(0, 10), 2), + "sell_tax_pct": sell_tax, + "can_sell": sell_tax < 95, + "burn_address": generate_fake_burn_address(), + "has_recover_function": random.choice([True, False]), + "has_mint_function": True, + "is_upgradeable_contract": random.choice([True, False]), + "volume_24h_usd": round(random.uniform(50_000, 500_000), 2), + "unique_buyers_24h": random.randint(8, 20), + "unique_sellers_24h": random.randint(7, 19), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + elif label == "RUG_RISK": + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(28, 55), 2) + lp_value = round(random.uniform(10_000, 80_000), 2) + sell_tax = round(random.uniform(5, 15), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["active_dev_wallet", "renounced"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["unlocked", "locked_30days", "locked_90days"]), + "lp_holder_type": random.choice(["dev_wallet", "unknown_personal_wallet"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": random.randint(5, 50), + "cluster_funded_from_same_source": random.choice([True, False]), + "unique_holders": random.randint(200, 2000), + "token_age_days": random.randint(3, 60), + "buy_tax_pct": round(random.uniform(0, 5), 2), + "sell_tax_pct": sell_tax, + "can_sell": True, + "burn_address": random.choice( + [ + generate_fake_burn_address(), + "0x0000000000000000000000000000000000000000", + ] + ), + "has_recover_function": random.choice([True, False]), + "has_mint_function": random.choice([True, False]), + "is_upgradeable_contract": random.choice([True, False]), + "volume_24h_usd": round(random.uniform(20_000, 200_000), 2), + "unique_buyers_24h": random.randint(15, 40), + "unique_sellers_24h": random.randint(12, 38), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + else: # LEGITIMATE + supply = random.randint(1_000_000, 1_000_000_000_000) + cluster_pct = round(random.uniform(0, 15), 2) + lp_value = round(random.uniform(50_000, 5_000_000), 2) + return { + "total_supply": supply, + "mint_authority": random.choice(["renounced", "burned"]), + "lp_value_usd": lp_value, + "lp_status": random.choice(["burned", "locked_365days", "locked_180days"]), + "lp_holder_type": random.choice(["dex_contract", "burned"]), + "cluster_holding_pct": cluster_pct, + "cluster_wallet_count": ( + random.randint(0, max(2, int(cluster_pct))) if cluster_pct > 0 else 0 + ), + "cluster_funded_from_same_source": False, + "unique_holders": random.randint(1000, 100_000), + "token_age_days": random.randint(60, 1000), + "buy_tax_pct": round(random.uniform(0, 1), 2), + "sell_tax_pct": round(random.uniform(0, 1), 2), + "can_sell": True, + "burn_address": random.choice(list(VALID_BURN_ADDRESSES)), + "has_recover_function": False, + "has_mint_function": False, + "is_upgradeable_contract": False, + "volume_24h_usd": round(random.uniform(100_000, 10_000_000), 2), + "unique_buyers_24h": random.randint(100, 5000), + "unique_sellers_24h": random.randint(80, 4000), + "true_dump_impact": calculate_dump_impact(supply, lp_value, cluster_pct), + } + + +def format_prompt(data: dict) -> str: + buy_sell_ratio = round( + data["unique_buyers_24h"] / max(data["unique_sellers_24h"], 1), 2 + ) + return f"""Analyze this token's on-chain data: + +[SUPPLY & MINT] +Total Supply (fixed): {data['total_supply']:,} tokens +Mint Authority: {data['mint_authority']} +Has Mint Function: {data['has_mint_function']} +Upgradeable Contract: {data['is_upgradeable_contract']} + +[LIQUIDITY POOL] +LP Value: ${data['lp_value_usd']:,.2f} USD +LP Status: {data['lp_status']} +LP Holder Type: {data['lp_holder_type']} + +[BURN] +Burn Address: {data['burn_address']} +Has Recover/Withdraw Function: {data['has_recover_function']} + +[TAX] +Buy Tax: {data['buy_tax_pct']}% +Sell Tax: {data['sell_tax_pct']}% +Can Holders Sell: {data['can_sell']} + +[HOLDER DISTRIBUTION] +Connected Cluster Holdings: {data['cluster_holding_pct']}% of supply +Wallets in Cluster: {data['cluster_wallet_count']} +Funded From Same Source: {data['cluster_funded_from_same_source']} +Unique Holders: {data['unique_holders']} +Token Age: {data['token_age_days']} days + +[TRADING ACTIVITY 24H] +Volume: ${data['volume_24h_usd']:,.2f} USD +Unique Buyers: {data['unique_buyers_24h']} +Unique Sellers: {data['unique_sellers_24h']} +Buy/Sell Ratio: {buy_sell_ratio} + +Classify this token, explain your reasoning across all dimensions, +and calculate the estimated price drop if the cluster dumps their entire holding into the LP.""" + + +def score_response(response: str, data: dict, true_label: str) -> float: + score = 0.0 + response_upper = response.upper() + + # 1. Classification (0.4) + classification = None + for label in ["SCAM", "RUG_RISK", "LEGITIMATE"]: + # Check if the exact classification format is present + if f"CLASSIFICATION: {label}" in response_upper: + classification = label + break + + if classification is None: + return 0.0 + + if classification == true_label: + score += 0.4 + elif (true_label == "SCAM" and classification == "RUG_RISK") or ( + true_label == "RUG_RISK" and classification == "SCAM" + ): + score += 0.2 + + # 2. Reasoning quality (0.3) + keywords = { + "SCAM": [ + "cluster", + "mint", + "tax", + "sell", + "honeypot", + "burn", + "wash", + "fake", + "recover", + ], + "RUG_RISK": ["cluster", "lp", "lock", "tax", "risk", "upgrade", "dev"], + "LEGITIMATE": [ + "renounced", + "burned", + "locked", + "dex", + "distributed", + "healthy", + "low", + "tax", + ], + } + kws = keywords.get(true_label, []) + matched = sum(1 for kw in kws if kw in response.lower()) + score += min(matched / len(kws), 1.0) * 0.3 + + # 3. Math accuracy (0.3) + if true_label in ["SCAM", "RUG_RISK"]: + true_impact = data["true_dump_impact"] + match = re.search(r"DUMP IMPACT[:\s]*([\d.]+)\s*%", response, re.IGNORECASE) + if match: + try: + ai_impact = float(match.group(1)) + diff = abs(ai_impact - true_impact) + if diff <= 2.0: + score += 0.3 + elif diff <= 5.0: + score += 0.15 + elif diff <= 10.0: + score += 0.05 + except ValueError: + pass + else: # LEGITIMATE + if "n/a" in response.lower() or "not applicable" in response.lower(): + score += 0.3 + + return round(score, 4) + + +class ScamOrRugEnv(BaseEnv): + """ + Environment for training on-chain scam/rug pull detection. + Uses synthetic token data across 3 labels: SCAM, RUG_RISK, LEGITIMATE. + Reward based on correct classification + reasoning keywords + dump impact math. + """ + + name = "scam_or_rug_onchain" + + def __init__(self, config: ScamOrRugConfig, **kwargs): + super().__init__(config, **kwargs) + self.labels = ["SCAM", "RUG_RISK", "LEGITIMATE"] + self.percent_correct_buffer = [] + + @classmethod + def config_init(cls): + return ScamOrRugConfig(), ServerBaseline() + + async def setup(self): + pass + + async def get_next_item(self) -> Item: + label = random.choice(self.labels) + return (label,) + + async def collect_trajectories(self, item) -> tuple: + label = item[0] + data = generate_token_data(label) + prompt = format_prompt(data) + + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ] + + completions = await self.server.completion( + messages=messages, + n=self.config.group_size, + max_tokens=self.config.max_token_length, + ) + + scored = ScoredDataGroup() + scored["tokens"] = [] + scored["masks"] = [] + scored["scores"] = [] + + for completion in completions.choices: + response = completion.message.content + reward = score_response(response, data, label) + + # tokenize + full_text = self.tokenizer.apply_chat_template( + messages + [{"role": "assistant", "content": response}], + tokenize=True, + add_generation_prompt=False, + ) + prompt_text = self.tokenizer.apply_chat_template( + messages, + tokenize=True, + add_generation_prompt=True, + ) + tokens = full_text + prompt_len = len(prompt_text) + masks = [-100] * prompt_len + [1] * (len(full_text) - prompt_len) + + scored["tokens"].append(tokens) + scored["masks"].append(masks) + scored["scores"].append(reward) + + # track accuracy + response_upper = response.upper() + for lbl in ["SCAM", "RUG_RISK", "LEGITIMATE"]: + if f"CLASSIFICATION: {lbl}" in response_upper: + self.percent_correct_buffer.append(1.0 if lbl == label else 0.0) + break + + return scored, [] + + async def wandb_log(self, wandb_metrics: Optional[dict] = None): + if wandb_metrics is None: + wandb_metrics = {} + + if self.percent_correct_buffer: + wandb_metrics["train/percent_correct"] = sum( + self.percent_correct_buffer + ) / len(self.percent_correct_buffer) + self.percent_correct_buffer = [] + + await super().wandb_log(wandb_metrics) + + async def evaluate(self, *args, **kwargs): + pass + + +if __name__ == "__main__": + ScamOrRugEnv.cli()