Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 65 additions & 2 deletions bbot/core/helpers/web/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import asyncio
import logging
import traceback
from urllib.parse import urlparse
from socksio.exceptions import SOCKSError
from contextlib import asynccontextmanager
from radixtarget import RadixTarget

from bbot.core.engine import EngineServer
from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain, truncate_string
Expand Down Expand Up @@ -36,6 +38,32 @@ def __init__(self, socket_path, target, config={}, debug=False):
self.web_clients = {}
self.web_client = self.AsyncClient(persist_cookies=False)

# proxy exclusion support
self.has_proxy = bool(self.web_config.get("http_proxy", ""))
proxy_exclusions = self.web_config.get("http_proxy_exclude", [])
self.noproxy_web_clients = {}
self.proxy_bypass_all = False
if self.has_proxy and proxy_exclusions:
normalized = []
for pattern in proxy_exclusions:
pattern = str(pattern).strip()
if pattern == "*":
self.proxy_bypass_all = True
break
# normalize NO_PROXY conventions for radixtarget
# ".example.com" and "*.example.com" both mean "example.com + subdomains"
if pattern.startswith("*."):
pattern = pattern[2:]
elif pattern.startswith("."):
pattern = pattern[1:]
if pattern:
normalized.append(pattern)
self.proxy_exclusion_target = RadixTarget(*normalized) if normalized else RadixTarget()
self.noproxy_web_client = self._AsyncClient_noproxy(persist_cookies=False)
else:
self.proxy_exclusion_target = RadixTarget()
self.noproxy_web_client = None

def AsyncClient(self, *args, **kwargs):
# cache by retries to prevent unwanted accumulation of clients
# (they are not garbage-collected)
Expand All @@ -49,12 +77,44 @@ def AsyncClient(self, *args, **kwargs):
self.web_clients[client.retries] = client
return client

def _AsyncClient_noproxy(self, *args, **kwargs):
"""Create/cache a BBOTAsyncClient with proxy disabled, for excluded hosts."""
retries = kwargs.get("retries", 1)
try:
return self.noproxy_web_clients[retries]
except KeyError:
from .client import BBOTAsyncClient

noproxy_config = dict(self.config)
noproxy_web = dict(noproxy_config.get("web", {}))
noproxy_web["http_proxy"] = None
noproxy_config["web"] = noproxy_web
client = BBOTAsyncClient.from_config(noproxy_config, self.target, *args, **kwargs)
self.noproxy_web_clients[client.retries] = client
return client

def _get_client_for_url(self, url, client=None):
"""Return the appropriate client based on proxy exclusion rules.

If no explicit client is provided and the URL matches an exclusion pattern,
returns the no-proxy client. Otherwise returns the given client or default.
"""
if client is not None:
return client
if self.noproxy_web_client is not None and url:
if self.proxy_bypass_all:
return self.noproxy_web_client
hostname = urlparse(str(url)).hostname
if hostname and self.proxy_exclusion_target.get(hostname):
return self.noproxy_web_client
return self.web_client

async def request(self, *args, **kwargs):
raise_error = kwargs.pop("raise_error", False)
# TODO: use this
cache_for = kwargs.pop("cache_for", None) # noqa

client = kwargs.get("client", self.web_client)
explicit_client = kwargs.pop("client", None)

# allow vs follow, httpx why??
allow_redirects = kwargs.pop("allow_redirects", None)
Expand All @@ -79,6 +139,8 @@ async def request(self, *args, **kwargs):

if client_kwargs:
client = self.AsyncClient(**client_kwargs)
else:
client = self._get_client_for_url(url, explicit_client)

try:
async with self._acatch(url, raise_error):
Expand Down Expand Up @@ -144,7 +206,8 @@ async def stream_request(self, url, **kwargs):
chunk_size = 8192
chunks = []

async with self._acatch(url, raise_error=True), self.web_client.stream(url=url, **kwargs) as response:
stream_client = self._get_client_for_url(url)
async with self._acatch(url, raise_error=True), stream_client.stream(url=url, **kwargs) as response:
agen = response.aiter_bytes(chunk_size=chunk_size)
async for chunk in agen:
_chunk_size = len(chunk)
Expand Down
3 changes: 3 additions & 0 deletions bbot/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ dns:
web:
# HTTP proxy
http_proxy:
# Hosts/CIDRs to exclude from HTTP proxy (NO_PROXY equivalent)
# Examples: ["localhost", "*.internal.corp", "10.0.0.0/8", "elastic.mycompany.com"]
http_proxy_exclude: []
# Web user-agent
user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97
# Suffix to append to user-agent (e.g. for tracking or identification)
Expand Down
10 changes: 10 additions & 0 deletions bbot/scanner/preset/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ def preset_from_args(self):
if self.parsed.proxy:
args_preset.core.merge_custom({"web": {"http_proxy": self.parsed.proxy}})

if self.parsed.no_proxy:
args_preset.core.merge_custom({"web": {"http_proxy_exclude": self.parsed.no_proxy}})

if self.parsed.custom_headers:
args_preset.core.merge_custom({"web": {"http_headers": self.parsed.custom_headers}})

Expand Down Expand Up @@ -372,6 +375,13 @@ def create_parser(self, *args, **kwargs):
misc = p.add_argument_group(title="Misc")
misc.add_argument("--version", action="store_true", help="show BBOT version and exit")
misc.add_argument("--proxy", help="Use this proxy for all HTTP requests", metavar="HTTP_PROXY")
misc.add_argument(
"--no-proxy",
nargs="+",
default=[],
help="Exclude these hosts from proxy (e.g. localhost *.internal.corp 10.0.0.0/8)",
metavar="HOST",
)
misc.add_argument(
"-H",
"--custom-headers",
Expand Down
7 changes: 7 additions & 0 deletions bbot/scanner/preset/environ.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@ def prepare(self):
environ.pop("HTTP_PROXY", None)
environ.pop("HTTPS_PROXY", None)

# handle proxy exclusions (NO_PROXY)
http_proxy_exclude = self.preset.config.get("web", {}).get("http_proxy_exclude", [])
if http_proxy_exclude:
environ["NO_PROXY"] = ",".join(str(x) for x in http_proxy_exclude)
else:
environ.pop("NO_PROXY", None)

# ssl verification
import urllib3

Expand Down
1 change: 1 addition & 0 deletions bbot/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def __init__(
max_redirects = web_config.get("http_max_redirects", 5)
self.web_max_redirects = max(max_redirects, self.web_spider_distance)
self.http_proxy = web_config.get("http_proxy", "")
self.http_proxy_exclude = web_config.get("http_proxy_exclude", [])
self.http_timeout = web_config.get("http_timeout", 10)
self.httpx_timeout = web_config.get("httpx_timeout", 5)
self.http_retries = web_config.get("http_retries", 1)
Expand Down
64 changes: 64 additions & 0 deletions bbot/test/test_step_1/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,70 @@ async def test_http_proxy(bbot_scanner, bbot_httpserver, proxy_server):
await scan._cleanup()


@pytest.mark.asyncio
async def test_http_proxy_exclude(bbot_scanner, bbot_httpserver, proxy_server):
"""Verify that requests to excluded hosts bypass the proxy."""
endpoint = "/test_http_proxy_exclude"
url = bbot_httpserver.url_for(endpoint)
bbot_httpserver.expect_request(uri=endpoint).respond_with_data("proxy_exclude_works")

proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
# Exclude 127.0.0.1 from proxy
scan = bbot_scanner(
"127.0.0.1",
config={
"web": {
"http_proxy": proxy_address,
"http_proxy_exclude": ["127.0.0.1"],
}
},
)

await scan._prep()

proxy_server.RequestHandlerClass.urls.clear()
r = await scan.helpers.request(url)

# Request should NOT go through proxy
assert len(proxy_server.RequestHandlerClass.urls) == 0, "Request should have bypassed proxy but went through it"
assert r.status_code == 200 and r.text == "proxy_exclude_works"

await scan._cleanup()


@pytest.mark.asyncio
async def test_http_proxy_exclude_passthrough(bbot_scanner, bbot_httpserver, proxy_server):
"""Verify that non-excluded hosts still go through the proxy."""
endpoint = "/test_proxy_passthrough"
url = bbot_httpserver.url_for(endpoint)
bbot_httpserver.expect_request(uri=endpoint).respond_with_data("passthrough_works")

proxy_address = f"http://127.0.0.1:{proxy_server.server_address[1]}"
# Exclude a different host, not the one we're requesting
scan = bbot_scanner(
"127.0.0.1",
config={
"web": {
"http_proxy": proxy_address,
"http_proxy_exclude": ["10.0.0.0/8"],
}
},
)

await scan._prep()

proxy_server.RequestHandlerClass.urls.clear()
r = await scan.helpers.request(url)

# Request SHOULD go through proxy (127.0.0.1 not in exclusion list)
assert len(proxy_server.RequestHandlerClass.urls) == 1, (
f"Request to {url} should have gone through proxy but didn't"
)
assert r.status_code == 200 and r.text == "passthrough_works"

await scan._cleanup()


@pytest.mark.asyncio
async def test_http_ssl(bbot_scanner, bbot_httpserver_ssl):
endpoint = "/test_http_ssl"
Expand Down
Loading