diff --git a/.gitignore b/.gitignore index 768643121b..84360a1600 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__/ /data/ /neo4j/ .DS_Store +pytest_debug.log diff --git a/AGENTS.md b/AGENTS.md index 88e017d24f..9e7493e719 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -146,6 +146,7 @@ Key helpers: | Helper | What it does | |--------|-------------| | `self.helpers.request(url)` | Make an HTTP request (with retries, SSL handling, etc.) | +| `self.helpers.blasthttp` | Shared blasthttp client (rate-limited via `web.http_rate_limit` config) | | `self.helpers.resolve(host)` | DNS resolution | | `self.helpers.is_ip(s)` | Check if string is an IP | | `self.helpers.is_dns_name(s)` | Check if string is a hostname | @@ -219,7 +220,7 @@ from .base import ModuleTestBase class TestMyModule(ModuleTestBase): async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.example.com/lookup?domain=blacklanternsecurity.com", json={"emails": ["info@blacklanternsecurity.com"]}, ) @@ -370,7 +371,7 @@ Whether to process seed events (the initial targets provided to the scan). Whether to accept "special" URLs (e.g. JavaScript files) that are not normally distributed to web modules. ```python -# httpx.py - needs to process all URLs including special ones +# http.py - needs to process all URLs including special ones accept_url_special = True ``` @@ -535,7 +536,7 @@ _preserve_graph = True Exclude this module from scan statistics. Used by output and report modules. ##### `_disable_auto_module_deps` (bool) -- default: `False` -Prevent BBOT from automatically enabling dependency modules. For example, if your module watches `URL` events, BBOT normally auto-enables `httpx`. Set this to `True` to prevent that. +Prevent BBOT from automatically enabling dependency modules. For example, if your module watches `URL` events, BBOT normally auto-enables `http`. Set this to `True` to prevent that. --- @@ -875,7 +876,7 @@ class TestMyModule(ModuleTestBase): targets = ["http://127.0.0.1:8888"] # Optional: override which modules are enabled - modules_overrides = ["httpx", "my_module"] + modules_overrides = ["http", "my_module"] # Optional: override config config_overrides = {"modules": {"my_module": {"some_option": True}}} @@ -887,7 +888,7 @@ class TestMyModule(ModuleTestBase): async def setup_after_prep(self, module_test): """Called AFTER the scan is prepared. Modify modules, add mocks here.""" # Mock an HTTP response - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.example.com/lookup?domain=blacklanternsecurity.com", json={"results": ["sub.blacklanternsecurity.com"]}, ) @@ -920,7 +921,7 @@ The test lifecycle runs: ### Test Utilities -- **`module_test.httpx_mock`** - mock HTTP responses (from pytest-httpx) +- **`module_test.blasthttp_mock`** - mock HTTP responses - **`module_test.httpserver`** - real HTTP server on port 8888 - **`module_test.httpserver_ssl`** - real HTTPS server on port 9999 - **`module_test.mock_dns(data)`** - mock DNS responses @@ -933,7 +934,7 @@ Real example -- `test_module_robots.py`: ```python class TestRobots(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "robots"] + modules_overrides = ["http", "robots"] config_overrides = {"modules": {"robots": {"include_sitemap": True}}} async def setup_after_prep(self, module_test): diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index f50ae0c77c..77756df241 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -101,8 +101,8 @@ class BaseEvent: "parent": "OPEN_TCP_PORT:cf7e6a937b161217eaed99f0c566eae045d094c7", "tags": ["in-scope", "distance-0", "dir", "status-301"], "http_title": "301 Moved Permanently", - "module": "httpx", - "module_sequence": "httpx" + "module": "http", + "module_sequence": "http" } ``` """ diff --git a/bbot/core/helpers/command.py b/bbot/core/helpers/command.py index 7da96bbd38..173e100698 100644 --- a/bbot/core/helpers/command.py +++ b/bbot/core/helpers/command.py @@ -1,6 +1,7 @@ import os import asyncio import logging +import contextlib import traceback from signal import SIGINT from subprocess import CompletedProcess, CalledProcessError, SubprocessError @@ -157,7 +158,18 @@ async def run_live(self, *command, check=False, text=True, idle_timeout=None, ** command_str = " ".join(command) log.warning(f"Stderr for run_live({command_str}):\n\t{stderr}") finally: - proc_tracker.remove(proc) + proc_tracker.discard(proc) + # Kill the subprocess if it's still running (e.g. generator was cancelled/closed) + if proc.returncode is None: + with contextlib.suppress(Exception): + proc.terminate() + try: + await asyncio.wait_for(proc.wait(), timeout=5) + except (asyncio.TimeoutError, Exception): + with contextlib.suppress(Exception): + proc.kill() + if input_task is not None: + input_task.cancel() async def _spawn_proc(self, *command, **kwargs): @@ -270,7 +282,7 @@ def _prepare_command_kwargs(self, command, kwargs): >>> _prepare_command_kwargs(['ls', '-l'], {'sudo': True}) (['sudo', '-E', '-A', 'LD_LIBRARY_PATH=...', 'PATH=...', 'ls', '-l'], {'limit': 104857600, 'stdout': -1, 'stderr': -1, 'env': environ(...)}) """ - # limit = 100MB (this is needed for cases like httpx that are sending large JSON blobs over stdout) + # limit = 100MB (this is needed for cases that are sending large JSON blobs over stdout) if "limit" not in kwargs: kwargs["limit"] = 1024 * 1024 * 100 if "stdout" not in kwargs: diff --git a/bbot/core/helpers/diff.py b/bbot/core/helpers/diff.py index 64c1b1e6a5..29797f80b8 100644 --- a/bbot/core/helpers/diff.py +++ b/bbot/core/helpers/diff.py @@ -148,7 +148,7 @@ def compare_headers(self, headers_1, headers_2): for x in list(ddiff[k]): try: header_value = str(x).split("'")[1] - except KeyError: + except (KeyError, IndexError): continue differing_headers.append(header_value) return differing_headers @@ -233,9 +233,21 @@ async def compare( if item in subject_response.text: reflection = True break + diff_reasons = await self.parent_helper.run_in_executor_cpu( + self._compare_sync, + subject_response, + subject, + ) + + if not diff_reasons: + return (True, [], reflection, subject_response) + else: + return (False, diff_reasons, reflection, subject_response) + + def _compare_sync(self, subject_response, subject): + """CPU-bound comparison work offloaded from the event loop.""" try: subject_json = xmltodict.parse(subject_response.text) - except ExpatError: log.debug(f"Can't HTML parse for {subject.split('?')[0]}. Switching to text parsing as a backup") subject_json = subject_response.text.split("\n") @@ -255,13 +267,9 @@ async def compare( if self.compare_body(self.baseline_json, subject_json) is False: log.debug("difference in HTML body, no match") - diff_reasons.append("body") - if not diff_reasons: - return (True, [], reflection, subject_response) - else: - return (False, diff_reasons, reflection, subject_response) + return diff_reasons async def canary_check(self, url, mode, rounds=3): """ diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index d86b3ada02..0d2a1dbb6b 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -3,7 +3,7 @@ from pathlib import Path import multiprocessing as mp from functools import partial -from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from . import misc from .asn import ASNHelper @@ -86,12 +86,14 @@ def __init__(self, preset): self.process_pool = ProcessPoolExecutor(max_workers=num_processes) self._cloud = None + self._blasthttp_client = None self.re = RegexHelper(self) self.yara = YaraHelper(self) self.simhash = SimHashHelper() self._dns = None self._web = None + self._asn = None self._cloudcheck = None self._asn = None self.config_aware_validators = self.validators.Validators(self) @@ -117,6 +119,17 @@ def asn(self): self._asn = ASNHelper(self) return self._asn + @property + def blasthttp(self): + if self._blasthttp_client is None: + import blasthttp as _blasthttp + + self._blasthttp_client = _blasthttp.BlastHTTP() + rate_limit = self.web_config.get("http_rate_limit", 0) + if rate_limit: + self._blasthttp_client.set_rate_limit(rate_limit) + return self._blasthttp_client + @property def cloudcheck(self): if self._cloudcheck is None: @@ -195,22 +208,38 @@ def loop(self): """ if self._loop is None: self._loop = get_event_loop() + # only current caller is wafw00f (sync requests library) + self._io_executor = ThreadPoolExecutor(max_workers=max(8, (os.cpu_count() or 1) + 4)) + self._cpu_executor = ThreadPoolExecutor(max_workers=max(8, os.cpu_count() or 4)) + self._loop.set_default_executor(self._io_executor) return self._loop - def run_in_executor(self, callback, *args, **kwargs): + def run_in_executor_io(self, callback, *args, **kwargs): """ Run a synchronous task in the event loop's default thread pool executor Examples: Execute callback: - >>> result = await self.helpers.run_in_executor(callback_fn, arg1, arg2) + >>> result = await self.helpers.run_in_executor_io(callback_fn, arg1, arg2) + """ + callback = partial(callback, **kwargs) + return self.loop.run_in_executor(self._io_executor, callback, *args) + + def run_in_executor_cpu(self, callback, *args, **kwargs): + """ + Run short CPU-bound work that releases the GIL in a dedicated thread pool, + separate from I/O so it never queues behind long-running network calls. + + Examples: + Execute callback: + >>> result = await self.helpers.run_in_executor_cpu(callback_fn, arg1, arg2) """ callback = partial(callback, **kwargs) - return self.loop.run_in_executor(None, callback, *args) + return self.loop.run_in_executor(self._cpu_executor, callback, *args) def run_in_executor_mp(self, callback, *args, **kwargs): """ - Same as run_in_executor() except with a process pool executor + Same as run_in_executor_io() except with a process pool executor Use only in cases where callback is CPU-bound Examples: diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 6c5211762c..eb2e322bac 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -1661,7 +1661,7 @@ def rm_rf(f, ignore_errors=False): f (str or Path): The directory path to delete. Examples: - >>> rm_rf("/tmp/httpx98323849") + >>> rm_rf("/tmp/bbot98323849") """ import shutil @@ -2718,7 +2718,7 @@ def clean_dns_record(record): """ if not isinstance(record, str): record = str(record.to_text()) - return str(record).rstrip(".").lower() + return str(record).strip("'\"").rstrip(".").lower() def truncate_filename(file_path, max_length=255): diff --git a/bbot/core/helpers/names_generator.py b/bbot/core/helpers/names_generator.py index cc45b19cf6..43f2e072f9 100644 --- a/bbot/core/helpers/names_generator.py +++ b/bbot/core/helpers/names_generator.py @@ -25,6 +25,7 @@ "blazed", "bloodshot", "brown", + "cantankerous", "cheeky", "childish", "chiseled", @@ -56,6 +57,7 @@ "depressed", "deranged", "derogatory", + "derpy", "despicable", "devilish", "devious", @@ -103,6 +105,7 @@ "glutinous", "golden", "gothic", + "greasy", "grievous", "gummy", "hallucinogenic", @@ -137,11 +140,14 @@ "intoxicated", "inventive", "irritable", + "janky", + "lackadaisical", "large", "liquid", "loveable", "lovely", "lucid", + "lumpy", "malevolent", "malfunctioning", "malicious", @@ -221,6 +227,7 @@ "sinful", "sinister", "slippery", + "sloppy", "sly", "sneaky", "soft", @@ -311,6 +318,7 @@ "amir", "amy", "andrea", + "andres", "andrew", "angela", "ann", @@ -345,6 +353,7 @@ "brandon", "brandybuck", "brenda", + "brendan", "brian", "brianna", "brittany", @@ -399,6 +408,7 @@ "diana", "diane", "dobby", + "dominic", "donald", "donna", "dooku", diff --git a/bbot/core/helpers/regex.py b/bbot/core/helpers/regex.py index 72c4029cb7..7a6ec0b9cc 100644 --- a/bbot/core/helpers/regex.py +++ b/bbot/core/helpers/regex.py @@ -29,19 +29,19 @@ def compile(self, *args, **kwargs): async def search(self, compiled_regex, *args, **kwargs): self.ensure_compiled_regex(compiled_regex) - return await self.parent_helper.run_in_executor(compiled_regex.search, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(compiled_regex.search, *args, **kwargs) async def match(self, compiled_regex, *args, **kwargs): self.ensure_compiled_regex(compiled_regex) - return await self.parent_helper.run_in_executor(compiled_regex.match, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(compiled_regex.match, *args, **kwargs) async def sub(self, compiled_regex, *args, **kwargs): self.ensure_compiled_regex(compiled_regex) - return await self.parent_helper.run_in_executor(compiled_regex.sub, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(compiled_regex.sub, *args, **kwargs) async def findall(self, compiled_regex, *args, **kwargs): self.ensure_compiled_regex(compiled_regex) - return await self.parent_helper.run_in_executor(compiled_regex.findall, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(compiled_regex.findall, *args, **kwargs) async def findall_multi(self, compiled_regexes, *args, threads=10, **kwargs): """ @@ -55,7 +55,7 @@ async def findall_multi(self, compiled_regexes, *args, threads=10, **kwargs): tasks = {} def new_task(regex_name, r): - task = self.parent_helper.run_in_executor(r.findall, *args, **kwargs) + task = self.parent_helper.run_in_executor_cpu(r.findall, *args, **kwargs) tasks[task] = regex_name compiled_regexes = dict(compiled_regexes) @@ -77,7 +77,7 @@ def new_task(regex_name, r): async def finditer(self, compiled_regex, *args, **kwargs): self.ensure_compiled_regex(compiled_regex) - return await self.parent_helper.run_in_executor(self._finditer, compiled_regex, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(self._finditer, compiled_regex, *args, **kwargs) async def finditer_multi(self, compiled_regexes, *args, **kwargs): """ @@ -85,7 +85,7 @@ async def finditer_multi(self, compiled_regexes, *args, **kwargs): """ for r in compiled_regexes: self.ensure_compiled_regex(r) - return await self.parent_helper.run_in_executor(self._finditer_multi, compiled_regexes, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(self._finditer_multi, compiled_regexes, *args, **kwargs) def _finditer_multi(self, compiled_regexes, *args, **kwargs): matches = [] @@ -98,16 +98,16 @@ def _finditer(self, compiled_regex, *args, **kwargs): return list(compiled_regex.finditer(*args, **kwargs)) async def extract_params_html(self, *args, **kwargs): - return await self.parent_helper.run_in_executor(misc.extract_params_html, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(misc.extract_params_html, *args, **kwargs) async def extract_emails(self, *args, **kwargs): - return await self.parent_helper.run_in_executor(misc.extract_emails, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(misc.extract_emails, *args, **kwargs) async def search_dict_values(self, *args, **kwargs): def _search_dict_values(*_args, **_kwargs): return list(misc.search_dict_values(*_args, **_kwargs)) - return await self.parent_helper.run_in_executor(_search_dict_values, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(_search_dict_values, *args, **kwargs) async def recursive_decode(self, *args, **kwargs): - return await self.parent_helper.run_in_executor(misc.recursive_decode, *args, **kwargs) + return await self.parent_helper.run_in_executor_cpu(misc.recursive_decode, *args, **kwargs) diff --git a/bbot/core/helpers/web/blast_response.py b/bbot/core/helpers/web/blast_response.py new file mode 100644 index 0000000000..befe625715 --- /dev/null +++ b/bbot/core/helpers/web/blast_response.py @@ -0,0 +1,171 @@ +""" +Response wrapper that gives blasthttp responses a standard Python interface. + +BBOT modules see .status_code, .text, .content, .headers, .json(), +.raise_for_status() — the same interface they've always used. The wrapper +copies data out of the blasthttp PyO3 object into plain Python types so +nothing holds a reference to the Rust-side response after construction. +""" + +import json as _json +from collections.abc import MutableMapping as _MutableMapping +from datetime import timedelta as _timedelta + + +class CaseInsensitiveHeaders(_MutableMapping): + """ + Case-insensitive dict-like object for HTTP headers. + + Modules access headers case-insensitively, they do things like + response.headers.get("Content-Type") or response.headers["content-type"]. + This replicates that behavior using a simple list of (name, value) tuples + as the backing store (preserving duplicates like Set-Cookie). + + Supports mutation (del, __setitem__) for compatibility with HttpCompare. + """ + + __slots__ = ("_list", "_lower_dict") + + def __init__(self, header_tuples): + self._list = list(header_tuples) + self._rebuild_dict() + + def _rebuild_dict(self): + # Last value wins for dict-style access + self._lower_dict = {} + for k, v in self._list: + self._lower_dict[k.lower()] = v + + def get(self, key, default=None): + return self._lower_dict.get(key.lower(), default) + + def items(self): + return iter(self._list) + + def keys(self): + return self._lower_dict.keys() + + def values(self): + return self._lower_dict.values() + + def __getitem__(self, key): + try: + return self._lower_dict[key.lower()] + except KeyError: + raise KeyError(key) + + def __setitem__(self, key, value): + # Remove any existing entries with this key (case-insensitive) + lower_key = key.lower() + self._list = [(k, v) for k, v in self._list if k.lower() != lower_key] + self._list.append((key, value)) + self._lower_dict[lower_key] = value + + def __delitem__(self, key): + lower_key = key.lower() + if lower_key not in self._lower_dict: + raise KeyError(key) + self._list = [(k, v) for k, v in self._list if k.lower() != lower_key] + del self._lower_dict[lower_key] + + def __contains__(self, key): + return key.lower() in self._lower_dict + + def __iter__(self): + return iter(self._lower_dict) + + def __len__(self): + return len(self._lower_dict) + + def __eq__(self, other): + if isinstance(other, CaseInsensitiveHeaders): + return self._lower_dict == other._lower_dict + if isinstance(other, dict): + return self._lower_dict == {k.lower(): v for k, v in other.items()} + return NotImplemented + + def __repr__(self): + return f"CaseInsensitiveHeaders({self._list})" + + +class _RequestInfo: + """Minimal stand-in for a response.request object.""" + + __slots__ = ("url", "method") + + def __init__(self, url, method): + self.url = url + self.method = method + + +class BlasthttpResponse: + """ + Wraps data extracted from a blasthttp Response into a standard + interface so BBOT modules work without changes. + + All fields are plain Python types (str, bytes, int, dict) — no references + to blasthttp PyO3 objects are retained after construction. + """ + + __slots__ = ( + "status_code", + "url", + "text", + "content", + "headers", + "request", + "is_success", + "elapsed_ms", + "cookies", + ) + + def __init__(self, blast_resp, request_url, method="GET"): + self.status_code = blast_resp.status + self.url = str(blast_resp.url) + self.text = blast_resp.body + self.content = bytes(blast_resp.body_bytes) + self.headers = CaseInsensitiveHeaders(blast_resp.headers) + self.request = _RequestInfo(request_url, method) + self.is_success = 200 <= blast_resp.status < 400 + self.elapsed_ms = blast_resp.elapsed_ms + # Parse Set-Cookie headers into a simple dict (for r.cookies access) + self.cookies = {} + for k, v in blast_resp.headers: + if k.lower() == "set-cookie": + # Extract just the cookie name=value (before any ;) + parts = v.split(";", 1) + if "=" in parts[0]: + cname, cval = parts[0].split("=", 1) + self.cookies[cname.strip()] = cval.strip() + + @property + def elapsed(self): + """Return elapsed time as a timedelta ..""" + return _timedelta(milliseconds=self.elapsed_ms) + + def __bool__(self): + return True + + def json(self, **kwargs): + return _json.loads(self.text, **kwargs) + + def raise_for_status(self): + if self.status_code >= 400: + raise BlasthttpHTTPError( + f"HTTP {self.status_code} for url {self.url}", + response=self, + ) + + def __str__(self): + return self.text + + def __repr__(self): + return f"BlasthttpResponse(status={self.status_code}, url='{self.url}')" + + +class BlasthttpHTTPError(Exception): + """HTTP error raised by BlasthttpResponse.raise_for_status().""" + + def __init__(self, message, response=None): + super().__init__(message) + self.response = response diff --git a/bbot/core/helpers/web/client.py b/bbot/core/helpers/web/client.py deleted file mode 100644 index 8fb0171bb0..0000000000 --- a/bbot/core/helpers/web/client.py +++ /dev/null @@ -1,121 +0,0 @@ -import httpx -import logging -from httpx._models import Cookies - -log = logging.getLogger("bbot.core.helpers.web.client") - - -class DummyCookies(Cookies): - def extract_cookies(self, *args, **kwargs): - pass - - -class BBOTAsyncClient(httpx.AsyncClient): - """ - A subclass of httpx.AsyncClient tailored with BBOT-specific configurations and functionalities. - This class provides rate limiting, logging, configurable timeouts, user-agent customization, custom - headers, and proxy settings. Additionally, it allows the disabling of cookies, making it suitable - for use across an entire scan. - - Attributes: - _bbot_scan (object): BBOT scan object containing configuration details. - _persist_cookies (bool): Flag to determine whether cookies should be persisted across requests. - - Examples: - >>> async with BBOTAsyncClient(_bbot_scan=bbot_scan_object) as client: - >>> response = await client.request("GET", "https://example.com") - >>> print(response.status_code) - 200 - """ - - @classmethod - def from_config(cls, config, target, *args, **kwargs): - kwargs["_config"] = config - kwargs["_target"] = target - web_config = config.get("web", {}) - retries = kwargs.pop("retries", web_config.get("http_retries", 1)) - ssl_verify = web_config.get("ssl_verify", False) - if ssl_verify is False: - from .ssl_context import ssl_context_noverify - - ssl_verify = ssl_context_noverify - kwargs["transport"] = httpx.AsyncHTTPTransport(retries=retries, verify=ssl_verify) - kwargs["verify"] = ssl_verify - return cls(*args, **kwargs) - - def __init__(self, *args, **kwargs): - self._config = kwargs.pop("_config") - self._target = kwargs.pop("_target") - - self._web_config = self._config.get("web", {}) - http_debug = self._web_config.get("debug", None) - if http_debug: - log.trace(f"Creating AsyncClient: {args}, {kwargs}") - - self._persist_cookies = kwargs.pop("persist_cookies", False) - - # timeout - http_timeout = self._web_config.get("http_timeout", 20) - if "timeout" not in kwargs: - kwargs["timeout"] = http_timeout - - # headers - headers = kwargs.get("headers", None) - if headers is None: - headers = {} - - # cookies - cookies = kwargs.get("cookies", None) - if cookies is None: - cookies = {} - - # user agent - user_agent = ( - f"{self._web_config.get('user_agent', 'BBOT')} {self._web_config.get('user_agent_suffix') or ''}".strip() - ) - if "User-Agent" not in headers: - headers["User-Agent"] = user_agent - kwargs["headers"] = headers - kwargs["cookies"] = cookies - # proxy - proxies = self._web_config.get("http_proxy", None) - kwargs["proxy"] = proxies - - log.verbose(f"Creating httpx.AsyncClient({args}, {kwargs})") - super().__init__(*args, **kwargs) - if not self._persist_cookies: - self._cookies = DummyCookies() - - def build_request(self, *args, **kwargs): - if args: - url = args[0] - kwargs["url"] = url - url = kwargs["url"] - - in_target = self._target.in_target(str(url)) - - if in_target: - if not kwargs.get("cookies", None): - kwargs["cookies"] = {} - for ck, cv in self._web_config.get("http_cookies", {}).items(): - if ck not in kwargs["cookies"]: - kwargs["cookies"][ck] = cv - - request = super().build_request(**kwargs) - - if in_target: - for hk, hv in self._web_config.get("http_headers", {}).items(): - hv = str(hv) - # don't clobber headers - if hk not in request.headers: - request.headers[hk] = hv - return request - - def _merge_cookies(self, cookies): - if self._persist_cookies: - return super()._merge_cookies(cookies) - return cookies - - @property - def retries(self): - return self._transport._pool._retries diff --git a/bbot/core/helpers/web/engine.py b/bbot/core/helpers/web/engine.py deleted file mode 100644 index 1c3ecc0f52..0000000000 --- a/bbot/core/helpers/web/engine.py +++ /dev/null @@ -1,240 +0,0 @@ -import ssl -import anyio -import httpx -import asyncio -import logging -import traceback -from socksio.exceptions import SOCKSError -from contextlib import asynccontextmanager - -from bbot.core.engine import EngineServer -from bbot.core.helpers.misc import bytes_to_human, human_to_bytes, get_exception_chain, truncate_string - -log = logging.getLogger("bbot.core.helpers.web.engine") - - -class HTTPEngine(EngineServer): - CMDS = { - 0: "request", - 1: "request_batch", - 2: "request_custom_batch", - 3: "download", - } - - client_only_options = ( - "retries", - "max_redirects", - ) - - def __init__(self, socket_path, target, config={}, debug=False): - super().__init__(socket_path, debug=debug) - self.target = target - self.config = config - self.web_config = self.config.get("web", {}) - self.http_debug = self.web_config.get("debug", False) - self._ssl_context_noverify = None - self.web_clients = {} - self.web_client = self.AsyncClient(persist_cookies=False) - - def AsyncClient(self, *args, **kwargs): - # cache by retries to prevent unwanted accumulation of clients - # (they are not garbage-collected) - retries = kwargs.get("retries", 1) - try: - return self.web_clients[retries] - except KeyError: - from .client import BBOTAsyncClient - - client = BBOTAsyncClient.from_config(self.config, self.target, *args, **kwargs) - self.web_clients[client.retries] = client - return client - - async def request(self, *args, **kwargs): - raise_error = kwargs.pop("raise_error", False) - # TODO: use this - cache_for = kwargs.pop("cache_for", None) # noqa - - client = kwargs.get("client", self.web_client) - - # allow vs follow, httpx why?? - allow_redirects = kwargs.pop("allow_redirects", None) - if allow_redirects is not None and "follow_redirects" not in kwargs: - kwargs["follow_redirects"] = allow_redirects - - # in case of URL only, assume GET request - if len(args) == 1: - kwargs["url"] = args[0] - args = [] - - url = kwargs.get("url", "") - - if not args and "method" not in kwargs: - kwargs["method"] = "GET" - - client_kwargs = {} - for k in list(kwargs): - if k in self.client_only_options: - v = kwargs.pop(k) - client_kwargs[k] = v - - if client_kwargs: - client = self.AsyncClient(**client_kwargs) - - try: - async with self._acatch(url, raise_error): - if self.http_debug: - log.trace(f"Web request: {str(args)}, {str(kwargs)}") - response = await client.request(*args, **kwargs) - if self.http_debug: - log.trace( - f"Web response from {url}: {response} (Length: {len(response.content)}) headers: {response.headers}" - ) - return response - except httpx.HTTPError as e: - if raise_error: - _response = getattr(e, "response", None) - return {"_request_error": str(e), "_response": _response} - - async def request_batch(self, urls, threads=10, **kwargs): - async for (args, _, _), response in self.task_pool( - self.request, args_kwargs=urls, threads=threads, global_kwargs=kwargs - ): - yield args[0], response - - async def request_custom_batch(self, urls_and_kwargs, threads=10, **kwargs): - async for (args, kwargs, tracker), response in self.task_pool( - self.request, args_kwargs=urls_and_kwargs, threads=threads, global_kwargs=kwargs - ): - yield args[0], kwargs, tracker, response - - async def download(self, url, **kwargs): - warn = kwargs.pop("warn", True) - raise_error = kwargs.pop("raise_error", False) - filename = kwargs.pop("filename") - try: - result = await self.stream_request(url, **kwargs) - if result is None: - raise httpx.HTTPError(f"No response from {url}") - content, response = result - log.debug(f"Download result: HTTP {response.status_code}") - response.raise_for_status() - with open(filename, "wb") as f: - f.write(content) - return filename - except httpx.HTTPError as e: - log_fn = log.verbose - if warn: - log_fn = log.warning - log_fn(f"Failed to download {url}: {e}") - if raise_error: - _response = getattr(e, "response", None) - return {"_download_error": str(e), "_response": _response} - - async def stream_request(self, url, **kwargs): - follow_redirects = kwargs.pop("follow_redirects", True) - max_size = kwargs.pop("max_size", None) - raise_error = kwargs.pop("raise_error", False) - if max_size is not None: - max_size = human_to_bytes(max_size) - kwargs["follow_redirects"] = follow_redirects - if "method" not in kwargs: - kwargs["method"] = "GET" - try: - total_size = 0 - chunk_size = 8192 - chunks = [] - - async with self._acatch(url, raise_error=True), self.web_client.stream(url=url, **kwargs) as response: - agen = response.aiter_bytes(chunk_size=chunk_size) - async for chunk in agen: - _chunk_size = len(chunk) - if max_size is not None and total_size + _chunk_size > max_size: - log.verbose( - f"Size of response from {url} exceeds {bytes_to_human(max_size)}, file will be truncated" - ) - await agen.aclose() - break - total_size += _chunk_size - chunks.append(chunk) - return b"".join(chunks), response - except httpx.HTTPError as e: - self.log.debug(f"Error requesting {url}: {e}") - if raise_error: - raise - - def ssl_context_noverify(self): - if self._ssl_context_noverify is None: - ssl_context = ssl.create_default_context() - ssl_context.check_hostname = False - ssl_context.verify_mode = ssl.CERT_NONE - ssl_context.options &= ~ssl.OP_NO_SSLv2 & ~ssl.OP_NO_SSLv3 - ssl_context.set_ciphers("ALL:@SECLEVEL=0") - ssl_context.options |= 0x4 # Add the OP_LEGACY_SERVER_CONNECT option - self._ssl_context_noverify = ssl_context - return self._ssl_context_noverify - - @asynccontextmanager - async def _acatch(self, url, raise_error): - """ - Asynchronous context manager to handle various httpx errors during a request. - - Yields: - None - - Note: - This function is internal and should generally not be used directly. - `url`, `args`, `kwargs`, and `raise_error` should be in the same context as this function. - """ - try: - yield - except httpx.TimeoutException: - if raise_error: - raise - else: - log.verbose(f"HTTP timeout to URL: {url}") - except httpx.ConnectError: - if raise_error: - raise - else: - log.debug(f"HTTP connect failed to URL: {url}") - except httpx.HTTPError as e: - if raise_error: - raise - else: - log.trace(f"Error with request to URL: {url}: {e}") - log.trace(traceback.format_exc()) - except httpx.InvalidURL as e: - if raise_error: - raise - else: - log.warning( - f"Invalid URL (possibly due to dangerous redirect) on request to : {url}: {truncate_string(str(e), 200)}" - ) - log.trace(traceback.format_exc()) - except ssl.SSLError as e: - msg = f"SSL error with request to URL: {url}: {e}" - if raise_error: - raise httpx.RequestError(msg) - else: - log.trace(msg) - log.trace(traceback.format_exc()) - except anyio.EndOfStream as e: - msg = f"AnyIO error with request to URL: {url}: {e}" - if raise_error: - raise httpx.RequestError(msg) - else: - log.trace(msg) - log.trace(traceback.format_exc()) - except SOCKSError as e: - msg = f"SOCKS error with request to URL: {url}: {e}" - if raise_error: - raise httpx.RequestError(msg) - else: - log.trace(msg) - log.trace(traceback.format_exc()) - except BaseException as e: - # don't log if the error is the result of an intentional cancellation - if not any(isinstance(_e, asyncio.exceptions.CancelledError) for _e in get_exception_chain(e)): - log.trace(f"Unhandled exception with request to URL: {url}: {e}") - log.trace(traceback.format_exc()) - raise diff --git a/bbot/core/helpers/web/web.py b/bbot/core/helpers/web/web.py index 9f8b751c65..927d8417e3 100644 --- a/bbot/core/helpers/web/web.py +++ b/bbot/core/helpers/web/web.py @@ -1,16 +1,18 @@ +import json +import asyncio import logging +import traceback import warnings from pathlib import Path -from bs4 import BeautifulSoup - -from bbot.core.engine import EngineClient -from bbot.core.helpers.misc import truncate_filename -from bbot.errors import WordlistError, CurlError, WebError +from urllib.parse import urlencode, urlparse, urlunparse, parse_qs +from bs4 import BeautifulSoup from bs4 import MarkupResemblesLocatorWarning from bs4.builder import XMLParsedAsHTMLWarning -from .engine import HTTPEngine +from bbot.core.helpers.misc import truncate_filename, bytes_to_human, get_exception_chain +from bbot.errors import WordlistError, WebError +from .blast_response import BlasthttpResponse, BlasthttpHTTPError warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning) warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) @@ -18,21 +20,18 @@ log = logging.getLogger("bbot.core.helpers.web") -class WebHelper(EngineClient): - SERVER_CLASS = HTTPEngine - ERROR_CLASS = WebError - +class WebHelper: """ - Main utility class for managing HTTP operations in BBOT. It serves as a wrapper around the BBOTAsyncClient, - which itself is a subclass of httpx.AsyncClient. The class provides functionalities to make HTTP requests, - download files, and handle cached wordlists. + Main utility class for managing HTTP operations in BBOT. Uses blasthttp (Rust) as the + HTTP engine for all requests, downloads, and wordlist retrieval. + + All requests go through the shared blasthttp client on the parent helper, + which supports global rate limiting via ``web.http_rate_limit``. Attributes: parent_helper (object): The parent helper object containing scan configurations. http_debug (bool): Flag to indicate whether HTTP debugging is enabled. ssl_verify (bool): Flag to indicate whether SSL verification is enabled. - web_client (BBOTAsyncClient): An instance of BBOTAsyncClient for making HTTP requests. - client_only_options (tuple): A tuple of options only applicable to the web client. Examples: Basic web request: @@ -52,24 +51,143 @@ def __init__(self, parent_helper): self.web_config = self.config.get("web", {}) self.web_spider_depth = self.web_config.get("spider_depth", 1) self.web_spider_distance = self.web_config.get("spider_distance", 0) - self.web_clients = {} self.target = self.preset.target - self.ssl_verify = self.config.get("ssl_verify", False) - engine_debug = self.config.get("engine", {}).get("debug", False) - super().__init__(server_kwargs={"config": self.config, "target": self.target}, debug=engine_debug) - - def AsyncClient(self, *args, **kwargs): - # cache by retries to prevent unwanted accumulation of clients - # (they are not garbage-collected) - retries = kwargs.get("retries", 1) - try: - return self.web_clients[retries] - except KeyError: - from .client import BBOTAsyncClient + self.http_debug = self.web_config.get("debug", False) + self.ssl_verify = self.web_config.get("ssl_verify", False) + # Pre-compute config values for request preprocessing + self._http_timeout = self.web_config.get("http_timeout", 20) + self._http_retries = self.web_config.get("http_retries", 1) + self._http_proxy = self.web_config.get("http_proxy", None) + ua = self.web_config.get("user_agent", "BBOT") + ua_suffix = self.web_config.get("user_agent_suffix") or "" + self._user_agent = f"{ua} {ua_suffix}".strip() + self._custom_headers = self.web_config.get("http_headers", {}) + self._custom_cookies = self.web_config.get("http_cookies", {}) + + @property + def client(self): + """The shared rate-limited blasthttp client for target-directed traffic.""" + return self.parent_helper.blasthttp + + def _build_blasthttp_kwargs(self, url, **kwargs): + """ + Translate request kwargs into blasthttp.request() kwargs. - client = BBOTAsyncClient.from_config(self.config, self.target, *args, persist_cookies=False, **kwargs) - self.web_clients[client.retries] = client - return client + Handles: method, headers, body/data/json, timeout, follow_redirects, + max_redirects, proxy, retries, params, cookies, auth. + + Returns (url, method, blast_kwargs) — url may be modified if params were appended. + """ + method = kwargs.pop("method", "GET") + headers = kwargs.pop("headers", None) or {} + body = kwargs.pop("body", None) + data = kwargs.pop("data", None) + json_body = kwargs.pop("json", None) + timeout = kwargs.pop("timeout", self._http_timeout) + follow_redirects = kwargs.pop("follow_redirects", None) + max_redirects = kwargs.pop("max_redirects", None) + proxy = kwargs.pop("proxy", self._http_proxy) + retries = kwargs.pop("retries", self._http_retries) + params = kwargs.pop("params", None) + cookies = kwargs.pop("cookies", None) + auth = kwargs.pop("auth", None) + max_body_size = kwargs.pop("max_body_size", None) + request_target = kwargs.pop("request_target", None) + resolve_ip = kwargs.pop("resolve_ip", None) + ignore_bbot_global_settings = kwargs.pop("ignore_bbot_global_settings", False) + + # -- URL params -- + if params: + parsed = urlparse(url) + existing = parse_qs(parsed.query, keep_blank_values=True) + if isinstance(params, dict): + existing.update(params) + new_query = urlencode(existing, doseq=True) + url = urlunparse(parsed._replace(query=new_query)) + + # -- Headers as list of tuples -- + header_list = [] + + if not ignore_bbot_global_settings: + # User-Agent (can be overridden by caller) + if "User-Agent" not in headers: + header_list.append(("User-Agent", self._user_agent)) + + # Scan-level custom headers (only for in-scope URLs) + if self.target.in_target(url): + for hk, hv in self._custom_headers.items(): + if hk not in headers: + header_list.append((hk, str(hv))) + + # Scan-level custom cookies (merge with caller cookies) + if self._custom_cookies: + if cookies is None: + cookies = {} + for ck, cv in self._custom_cookies.items(): + if ck not in cookies: + cookies[ck] = cv + + # Caller-supplied headers + for hk, hv in headers.items(): + if isinstance(hv, list): + for v in hv: + header_list.append((hk, str(v))) + else: + header_list.append((hk, str(hv))) + + # -- JSON body -- + if json_body is not None: + body = json.dumps(json_body) + # Only set Content-Type if not already provided + if not any(k.lower() == "content-type" for k, _ in header_list): + header_list.append(("Content-Type", "application/json")) + + # -- Form data -- + if data is not None and body is None: + if isinstance(data, dict): + body = urlencode(data) + if not any(k.lower() == "content-type" for k, _ in header_list): + header_list.append(("Content-Type", "application/x-www-form-urlencoded")) + elif isinstance(data, (str, bytes)): + body = str(data) if isinstance(data, bytes) else data + + # -- Cookies -- + if cookies: + cookie_str = "; ".join(f"{ck}={cv}" for ck, cv in cookies.items()) + header_list.append(("Cookie", cookie_str)) + + # -- Basic auth -- + if auth: + import base64 + + user, passwd = auth + cred = base64.b64encode(f"{user}:{passwd}".encode()).decode() + header_list.append(("Authorization", f"Basic {cred}")) + + blast_kwargs = { + "method": method, + "headers": header_list, + "timeout": int(timeout) if timeout else self._http_timeout, + "verify_certs": bool(self.ssl_verify), + "retries": int(retries), + } + + if body is not None: + blast_kwargs["body"] = str(body) + if follow_redirects is not None: + blast_kwargs["follow_redirects"] = follow_redirects + if max_redirects is not None: + blast_kwargs["max_redirects"] = int(max_redirects) + if proxy: + blast_kwargs["proxy"] = proxy + if max_body_size is not None: + blast_kwargs["max_body_size"] = int(max_body_size) + if request_target is not None: + blast_kwargs["request_target"] = request_target + if resolve_ip is not None: + blast_kwargs["resolve_ip"] = resolve_ip + + return url, method, blast_kwargs async def request(self, *args, **kwargs): """ @@ -89,23 +207,21 @@ async def request(self, *args, **kwargs): cookies (dict, optional): Dictionary or CookieJar object containing cookies. json (Any, optional): A JSON serializable Python object to send in the body. data (dict, optional): Dictionary, list of tuples, or bytes to send in the body. - files (dict, optional): Dictionary of 'name': file-like-objects for multipart encoding upload. + body (str, optional): Raw string body to send (not URL-encoded). auth (tuple, optional): Auth tuple to enable Basic/Digest/Custom HTTP auth. timeout (float, optional): The maximum time to wait for the request to complete. proxy (str, optional): HTTP proxy URL. allow_redirects (bool, optional): Enables or disables redirection. Defaults to None. - stream (bool, optional): Enables or disables response streaming. raise_error (bool, optional): Whether to raise exceptions for HTTP connect, timeout errors. Defaults to False. - client (httpx.AsyncClient, optional): A specific httpx.AsyncClient to use for the request. Defaults to self.web_client. - cache_for (int, optional): Time in seconds to cache the request. Not used currently. Defaults to None. + request_target (str, optional): Override the HTTP request-line target. + resolve_ip (str, optional): Connect TCP to this IP instead of DNS resolution. + ignore_bbot_global_settings (bool, optional): Skip User-Agent/header/cookie merging. Raises: - httpx.TimeoutException: If the request times out. - httpx.ConnectError: If the connection fails. - httpx.RequestError: For other request-related errors. + WebError: If raise_error is True and the request fails. Returns: - httpx.Response or None: The HTTP response object returned by the httpx library. + Response or None: The HTTP response object. Examples: >>> response = await self.helpers.request("https://www.evilcorp.com") @@ -115,68 +231,156 @@ async def request(self, *args, **kwargs): Note: If the web request fails, it will return None unless `raise_error` is `True`. """ - raise_error = kwargs.get("raise_error", False) - result = await self.run_and_return("request", *args, **kwargs) - if isinstance(result, dict) and "_request_error" in result: + raise_error = kwargs.pop("raise_error", False) + kwargs.pop("cache_for", None) + kwargs.pop("client", None) + kwargs.pop("stream", None) + if kwargs.pop("files", None) is not None: + log.warning("blasthttp does not support multipart file uploads (files= kwarg)") + + # allow vs follow + allow_redirects = kwargs.pop("allow_redirects", None) + if allow_redirects is not None and "follow_redirects" not in kwargs: + kwargs["follow_redirects"] = allow_redirects + + # In case of URL only as positional arg + if len(args) == 1: + kwargs["url"] = args[0] + args = () + + url = kwargs.pop("url", "") + + if not url: if raise_error: - error_msg = result["_request_error"] - response = result["_response"] - error = self.ERROR_CLASS(error_msg) - error.response = response + error = WebError("No URL provided") raise error - return result + return None - async def request_batch(self, urls, *args, **kwargs): - """ - Given a list of URLs, request them in parallel and yield responses as they come in. + if "method" not in kwargs: + kwargs["method"] = "GET" - Args: - urls (list[str]): List of URLs to visit - *args: Positional arguments to pass through to httpx - **kwargs: Keyword arguments to pass through to httpx + # Translate kwargs to blasthttp format + url, method, blast_kwargs = self._build_blasthttp_kwargs(url, **kwargs) - Examples: - >>> async for url, response in self.helpers.request_batch(urls, headers={"X-Test": "Test"}): - >>> if response is not None and response.status_code == 200: - >>> self.hugesuccess(response) - """ - agen = self.run_and_yield("request_batch", urls, *args, **kwargs) - while 1: - try: - yield await agen.__anext__() - except (StopAsyncIteration, GeneratorExit): - await agen.aclose() - break + try: + if self.http_debug: + log.trace(f"blasthttp request: {method} {url}") + + # blasthttp returns a native coroutine via pyo3-async-runtimes + blast_response = await self.client.request(url, **blast_kwargs) + + response = BlasthttpResponse(blast_response, request_url=url, method=method) + + if self.http_debug: + log.trace( + f"blasthttp response from {url}: {response.status_code} " + f"(Length: {len(response.content)}) headers: {response.headers}" + ) + return response - async def request_custom_batch(self, urls_and_kwargs): + except RuntimeError as e: + error_msg = str(e) + if raise_error: + error = WebError(error_msg) + raise error + # Classify error for appropriate log level + lower = error_msg.lower() + if "timeout" in lower: + log.verbose(f"HTTP timeout to URL: {url}") + elif "connect" in lower or "connection" in lower: + log.debug(f"HTTP connect failed to URL: {url}") + else: + log.trace(f"blasthttp error for {url}: {error_msg}") + except BaseException as e: + if not any(isinstance(_e, asyncio.exceptions.CancelledError) for _e in get_exception_chain(e)): + log.trace(f"Unhandled exception with request to URL: {url}: {e}") + log.trace(traceback.format_exc()) + raise + + async def request_batch(self, urls, threads=10, **kwargs): """ - Make web requests in parallel with custom options for each request. Yield responses as they come in. + Request multiple URLs in parallel via blasthttp's native Rust batch engine. + + Applies the same header/cookie/proxy/timeout logic as ``request()`` — each + entry is translated into a ``blasthttp.BatchConfig`` and sent to Rust in one + shot. Results are returned as a list (not streamed). + + Each entry in ``urls`` can be: + - A plain URL string (uses shared ``**kwargs`` for all requests) + - A ``(url, per_request_kwargs)`` tuple for per-request options + - A ``(url, per_request_kwargs, tracker)`` tuple to attach arbitrary + tracking data that is returned alongside the response - Similar to `request_batch` except it allows individual arguments for each URL. + Returns: + When entries are plain strings: ``list[(url, response)]`` + When any entry includes a tracker: ``list[(url, response, tracker)]`` Args: - urls_and_kwargs (list[tuple]): List of tuples in the format: (url, kwargs, custom_tracker) - where custom_tracker is an optional value for your own internal use. You may use it to - help correlate requests, etc. + urls: URLs to visit — strings or ``(url, kwargs[, tracker])`` tuples. + threads (int): Concurrency passed to blasthttp. Defaults to 10. + **kwargs: Default keyword arguments (same as ``request()``). + Overridden by per-request kwargs when entries are tuples. Examples: - >>> urls_and_kwargs = [ - >>> ("http://evilcorp.com/1", {"method": "GET"}, "request-1"), - >>> ("http://evilcorp.com/2", {"method": "POST"}, "request-2"), - >>> ] - >>> async for url, kwargs, custom_tracker, response in self.helpers.request_custom_batch( - >>> urls_and_kwargs - >>> ): - >>> if response is not None and response.status_code == 200: - >>> self.hugesuccess(response) + Simple (shared kwargs):: + + results = await self.helpers.request_batch(urls, headers={"X-Test": "Test"}) + for url, response in results: + ... + + Per-request kwargs with tracker:: + + reqs = [("http://example.com", {"method": "POST"}, "my-tracker")] + results = await self.helpers.request_batch(reqs) + for url, response, tracker in results: + ... """ - agen = self.run_and_yield("request_custom_batch", urls_and_kwargs) - while 1: - try: - yield await agen.__anext__() - except (StopAsyncIteration, GeneratorExit): - await agen.aclose() - break + import blasthttp + + # Parse entries into uniform (url, req_kwargs, tracker) tuples + entries = [] + has_tracker = False + for entry in urls: + if isinstance(entry, str): + entries.append((entry, kwargs, None)) + elif isinstance(entry, tuple): + url = entry[0] + req_kwargs = entry[1] if len(entry) > 1 and isinstance(entry[1], dict) else kwargs + tracker = entry[2] if len(entry) > 2 else None + if tracker is not None: + has_tracker = True + entries.append((url, req_kwargs, tracker)) + else: + entries.append((str(entry), kwargs, None)) + + if not entries: + return [] + + # Build BatchConfig objects using the same logic as request() + configs = [] + trackers = [] + for url, req_kwargs, tracker in entries: + url, method, blast_kwargs = self._build_blasthttp_kwargs(url, **req_kwargs) + config = blasthttp.BatchConfig(url, **blast_kwargs) + configs.append(config) + trackers.append(tracker) + + # Send to Rust — all I/O happens here + batch_results = await self.client.request_batch(configs, concurrency=threads) + + # Convert to (url, response[, tracker]) tuples + # Results are returned in the same order as configs + results = [] + for i, br in enumerate(batch_results): + if br.response is not None: + response = BlasthttpResponse(br.response, request_url=br.url, method="GET") + else: + response = None + if has_tracker: + results.append((br.url, response, trackers[i])) + else: + results.append((br.url, response)) + return results async def download(self, url, **kwargs): """ @@ -193,7 +397,7 @@ async def download(self, url, **kwargs): A negative value disables caching. Defaults to -1. method (str, optional): The HTTP method to use for the request, defaults to 'GET'. raise_error (bool, optional): Whether to raise exceptions for HTTP connect, timeout errors. Defaults to False. - **kwargs: Additional keyword arguments to pass to the httpx request. + **kwargs: Additional keyword arguments to pass to request(). Returns: Path or None: The full path of the downloaded file as a Path object if successful, otherwise None. @@ -202,29 +406,65 @@ async def download(self, url, **kwargs): >>> filepath = await self.helpers.download("https://www.evilcorp.com/passwords.docx", cache_hrs=24) """ success = False + warn = kwargs.pop("warn", True) raise_error = kwargs.get("raise_error", False) filename = kwargs.pop("filename", self.parent_helper.cache_filename(url)) filename = truncate_filename(Path(filename).resolve()) - kwargs["filename"] = filename max_size = kwargs.pop("max_size", None) if max_size is not None: max_size = self.parent_helper.human_to_bytes(max_size) - kwargs["max_size"] = max_size cache_hrs = float(kwargs.pop("cache_hrs", -1)) + if cache_hrs > 0 and self.parent_helper.is_cached(url): log.debug(f"{url} is cached at {self.parent_helper.cache_filename(url)}") success = True else: - result = await self.run_and_return("download", url, **kwargs) - if isinstance(result, dict) and "_download_error" in result: + try: + kwargs["follow_redirects"] = kwargs.pop("follow_redirects", True) + if "method" not in kwargs: + kwargs["method"] = "GET" + kwargs["raise_error"] = True + # Use a longer timeout for downloads (default 5 minutes) + if "timeout" not in kwargs: + kwargs["timeout"] = 300 + # Raise the body size limit for downloads + if "max_body_size" not in kwargs: + if max_size is not None: + kwargs["max_body_size"] = max_size + else: + kwargs["max_body_size"] = 500 * 1024 * 1024 # 500MB default + + response = await self.request(url, **kwargs) + + if response is None: + raise BlasthttpHTTPError(f"No response from {url}") + + log.debug(f"Download result: HTTP {response.status_code}") + response.raise_for_status() + + content = response.content + # Truncate if max_size specified + if max_size is not None: + if len(content) > max_size: + log.verbose( + f"Size of response from {url} exceeds {bytes_to_human(max_size)}, file will be truncated" + ) + content = content[:max_size] + + with open(filename, "wb") as f: + f.write(content) + success = True + + except (BlasthttpHTTPError, WebError, RuntimeError) as e: + log_fn = log.verbose + if warn: + log_fn = log.warning + log_fn(f"Failed to download {url}: {e}") if raise_error: - error_msg = result["_download_error"] - response = result["_response"] - error = self.ERROR_CLASS(error_msg) - error.response = response + _response = getattr(e, "response", None) + error = WebError(str(e)) + error.response = _response raise error - elif result: - success = True if success: return filename @@ -300,134 +540,6 @@ async def wordlist(self, path, lines=None, zip=False, zip_filename=None, **kwarg f.write(line) return truncated_filename - async def curl(self, *args, **kwargs): - """ - An asynchronous function that runs a cURL command with specified arguments and options. - - This function constructs and executes a cURL command based on the provided parameters. - It offers support for various cURL options such as headers, post data, and cookies. - - Args: - *args: Variable length argument list for positional arguments. Unused in this function. - url (str): The URL for the cURL request. Mandatory. - raw_path (bool, optional): If True, activates '--path-as-is' in cURL. Defaults to False. - headers (dict, optional): A dictionary of HTTP headers to include in the request. - ignore_bbot_global_settings (bool, optional): If True, ignores the global settings of BBOT. Defaults to False. - post_data (dict, optional): A dictionary containing data to be sent in the request body. - method (str, optional): The HTTP method to use for the request (e.g., 'GET', 'POST'). - cookies (dict, optional): A dictionary of cookies to include in the request. - path_override (str, optional): Overrides the request-target to use in the HTTP request line. - head_mode (bool, optional): If True, includes '-I' to fetch headers only. Defaults to None. - raw_body (str, optional): Raw string to be sent in the body of the request. - **kwargs: Arbitrary keyword arguments that will be forwarded to the HTTP request function. - - Returns: - str: The output of the cURL command. - - Raises: - CurlError: If 'url' is not supplied. - - Examples: - >>> output = await curl(url="https://example.com", headers={"X-Header": "Wat"}) - >>> print(output) - """ - url = kwargs.get("url", "") - - if not url: - raise CurlError("No URL supplied to CURL helper") - - curl_command = ["curl", url, "-s"] - - raw_path = kwargs.get("raw_path", False) - if raw_path: - curl_command.append("--path-as-is") - - # respect global ssl verify settings - if self.ssl_verify is not True: - curl_command.append("-k") - - headers = kwargs.get("headers", {}) - cookies = kwargs.get("cookies", {}) - - ignore_bbot_global_settings = kwargs.get("ignore_bbot_global_settings", False) - - if ignore_bbot_global_settings: - http_timeout = 20 # setting 20 as a worse-case setting - log.debug("ignore_bbot_global_settings enabled. Global settings will not be applied") - else: - http_timeout = self.parent_helper.web_config.get("http_timeout", 20) - user_agent = f"{self.parent_helper.web_config.get('user_agent', 'BBOT')} {self.parent_helper.web_config.get('user_agent_suffix') or ''}".strip() - - if "User-Agent" not in headers: - headers["User-Agent"] = user_agent - - # only add custom headers / cookies if the URL is in-scope - if self.parent_helper.preset.in_scope(url): - for hk, hv in self.web_config.get("http_headers", {}).items(): - # Only add the header if it doesn't already exist in the headers dictionary - if hk not in headers: - headers[hk] = hv - - for ck, cv in self.web_config.get("http_cookies", {}).items(): - # don't clobber cookies - if ck not in cookies: - cookies[ck] = cv - - # add the timeout - if "timeout" not in kwargs: - timeout = http_timeout - - curl_command.append("-m") - curl_command.append(str(timeout)) - - for k, v in headers.items(): - if isinstance(v, list): - for x in v: - curl_command.append("-H") - curl_command.append(f"{k}: {x}") - - else: - curl_command.append("-H") - curl_command.append(f"{k}: {v}") - - post_data = kwargs.get("post_data", {}) - if len(post_data.items()) > 0: - curl_command.append("-d") - post_data_str = "" - for k, v in post_data.items(): - post_data_str += f"&{k}={v}" - curl_command.append(post_data_str.lstrip("&")) - - method = kwargs.get("method", "") - if method: - curl_command.append("-X") - curl_command.append(method) - - cookies = kwargs.get("cookies", "") - if cookies: - curl_command.append("-b") - cookies_str = "" - for k, v in cookies.items(): - cookies_str += f"{k}={v}; " - curl_command.append(f"{cookies_str.rstrip(' ')}") - - path_override = kwargs.get("path_override", None) - if path_override: - curl_command.append("--request-target") - curl_command.append(f"{path_override}") - - head_mode = kwargs.get("head_mode", None) - if head_mode: - curl_command.append("-I") - - raw_body = kwargs.get("raw_body", None) - if raw_body: - curl_command.append("-d") - curl_command.append(raw_body) - log.verbose(f"Running curl command: {curl_command}") - output = (await self.parent_helper.run(curl_command)).stdout - return output - def beautifulsoup( self, markup, @@ -478,6 +590,9 @@ def beautifulsoup( Searches the soup instance for all occurrences of the passed in argument """ try: + # If a response object is passed, extract the text + if hasattr(markup, "text") and not isinstance(markup, (str, bytes)): + markup = markup.text soup = BeautifulSoup( markup, features, builder, parse_only, from_encoding, exclude_encodings, element_classes, **kwargs ) @@ -488,7 +603,7 @@ def beautifulsoup( def response_to_json(self, response): """ - Convert web response to JSON object, similar to the output of `httpx -irr -json` + Convert web response to JSON object, to a JSON-serializable dict. """ if response is None: diff --git a/bbot/core/helpers/yara_helper.py b/bbot/core/helpers/yara_helper.py index 7f9428b55b..7ee37451c0 100644 --- a/bbot/core/helpers/yara_helper.py +++ b/bbot/core/helpers/yara_helper.py @@ -40,7 +40,7 @@ async def match(self, compiled_rules, text): Given a compiled YARA rule and a body of text, return a list of strings that match the rule """ matched_strings = [] - matches = await self.parent_helper.run_in_executor(compiled_rules.match, data=text) + matches = await self.parent_helper.run_in_executor_cpu(compiled_rules.match, data=text) if matches: for match in matches: for string_match in match.strings: diff --git a/bbot/core/modules.py b/bbot/core/modules.py index 1100919423..f89f114fe7 100644 --- a/bbot/core/modules.py +++ b/bbot/core/modules.py @@ -48,7 +48,7 @@ class ModuleLoader: module_dir_regex = re.compile(r"^[a-z][a-z0-9_]*$") # if a module consumes these event types, automatically assume these dependencies - default_module_deps = {"HTTP_RESPONSE": "httpx", "URL": "httpx", "SOCIAL": "social"} + default_module_deps = {"HTTP_RESPONSE": "http", "URL": "http", "SOCIAL": "social"} def __init__(self): self.core = CORE @@ -303,7 +303,7 @@ def preload_module(self, module_file): "hash": "d5a88dd3866c876b81939c920bf4959716e2a374", "deps": { "modules": [ - "httpx" + "http" ] "pip": [ "python-Wappalyzer~=0.3.1" diff --git a/bbot/core/shared_deps.py b/bbot/core/shared_deps.py index c3b7623654..49fa2d69cf 100644 --- a/bbot/core/shared_deps.py +++ b/bbot/core/shared_deps.py @@ -1,15 +1,3 @@ -DEP_FFUF = [ - { - "name": "Download ffuf", - "unarchive": { - "src": "https://github.com/ffuf/ffuf/releases/download/v#{BBOT_DEPS_FFUF_VERSION}/ffuf_#{BBOT_DEPS_FFUF_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH_GOLANG}.tar.gz", - "include": "ffuf", - "dest": "#{BBOT_TOOLS}", - "remote_src": True, - }, - } -] - DEP_DOCKER = [ { "name": "Check if Docker is already installed", @@ -244,7 +232,7 @@ }, ] -# shared module dependencies -- ffuf, massdns, chromium, etc. +# shared module dependencies -- massdns, chromium, etc. SHARED_DEPS = {} for var, val in list(locals().items()): if var.startswith("DEP_") and isinstance(val, list): diff --git a/bbot/defaults.yml b/bbot/defaults.yml index fa3727b1b4..3856a1f644 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -96,8 +96,8 @@ web: spider_links_per_page: 25 # HTTP timeout (for Python requests; API calls, etc.) http_timeout: 10 - # HTTP timeout (for httpx) - httpx_timeout: 5 + # HTTP timeout (for blasthttp) + blasthttp_timeout: 5 # Custom HTTP headers (e.g. cookies, etc.) # in the format { "Header-Key": "header_value" } # These are attached to all in-scope HTTP requests @@ -109,8 +109,8 @@ web: api_retries: 2 # HTTP retries - try again if the raw connection fails http_retries: 1 - # HTTP retries (for httpx) - httpx_retries: 1 + # HTTP retries (for blasthttp) + blasthttp_retries: 1 # Default sleep interval when rate limited by 429 (and retry-after isn't provided) 429_sleep_interval: 30 # Maximum sleep interval when rate limited by 429 (and an excessive retry-after is provided) @@ -121,6 +121,9 @@ web: http_max_redirects: 5 # Whether to verify SSL certificates ssl_verify: false + # Maximum HTTP requests per second (0 = unlimited) + # Applies globally across all blasthttp consumers (http probing, web brute, etc.) + http_rate_limit: 0 ### ENGINE ### @@ -129,8 +132,6 @@ engine: # Tool dependencies deps: - ffuf: - version: "2.1.0" # How to handle installation of module dependencies # Choices are: # - abort_on_failure (default) - if a module dependency fails to install, abort the scan diff --git a/bbot/modules/baddns.py b/bbot/modules/baddns.py index ace72c9c68..894ed72004 100644 --- a/bbot/modules/baddns.py +++ b/bbot/modules/baddns.py @@ -130,25 +130,10 @@ async def _run_module(self, module_instance): self.warning(f"Task for {module_instance} raised an error: {e}") return module_instance, None - def _new_http_client(self, *args, **kwargs): - """Create a non-cached HTTP client for baddns submodules. - - baddns submodules close their HTTP clients during cleanup, so we can't - use the caching ``web.AsyncClient`` factory — that would let one - submodule close a client that another submodule is still using. - - TODO: revisit this when we switch to blasthttp — the caching/lifecycle - model will be different and this workaround may no longer be needed. - """ - from bbot.core.helpers.web.client import BBOTAsyncClient - - return BBOTAsyncClient.from_config(self.scan.config, self.scan.target, *args, persist_cookies=False, **kwargs) - async def handle_event(self, event): coroutines = [] for ModuleClass in self.select_modules(): kwargs = { - "http_client_class": self._new_http_client, "dns_client": self.scan.helpers.dns.resolver, "custom_nameservers": self.custom_nameservers, "signatures": self.signatures, diff --git a/bbot/modules/baddns_direct.py b/bbot/modules/baddns_direct.py index f4093fa8cc..be2311bd38 100644 --- a/bbot/modules/baddns_direct.py +++ b/bbot/modules/baddns_direct.py @@ -27,7 +27,6 @@ def set_modules(self): async def handle_event(self, event): CNAME_direct_module = self.select_modules()[0] kwargs = { - "http_client_class": self.scan.helpers.web.AsyncClient, "dns_client": self.scan.helpers.dns.resolver, "custom_nameservers": self.custom_nameservers, "signatures": self.signatures, diff --git a/bbot/modules/badsecrets.py b/bbot/modules/badsecrets.py index a5f8049f9e..ec673cce12 100644 --- a/bbot/modules/badsecrets.py +++ b/bbot/modules/badsecrets.py @@ -34,7 +34,7 @@ async def setup(self): @property def _module_threads(self): - return max(1, multiprocessing.cpu_count() - 1) + return min(4, max(1, multiprocessing.cpu_count() - 1)) async def handle_event(self, event): resp_body = event.data.get("body", None) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index d7356bd809..a00c1966aa 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -73,7 +73,7 @@ class BaseModule: _stats_exclude (bool): Whether to exclude this module from scan statistics. Default is False. - _disable_auto_module_deps (bool): Whether to disable automatic module dependencies. This is useful e.g. if the module consumes URLs, but you don't want to automatically enable the httpx module. Default is False. + _disable_auto_module_deps (bool): Whether to disable automatic module dependencies. This is useful e.g. if the module consumes URLs, but you don't want to automatically enable the blasthttp module. Default is False. _qsize (int): Outgoing queue size (0 for infinite). Default is 0. @@ -793,8 +793,6 @@ async def _worker(self): finally: event._minimize() except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") raise except RuntimeError as e: self.trace(f"RuntimeError in module {self.name}: {e}") @@ -1404,7 +1402,7 @@ async def api_page_iter(self, url, page_size=100, _json=True, next_key=None, ite **requests_kwargs: Arbitrary keyword arguments that will be forwarded to the HTTP request function. Yields: - dict or httpx.Response: If 'json' is True, yields a dictionary containing the parsed JSON data. Otherwise, yields the raw HTTP response. + dict or Response: If 'json' is True, yields a dictionary containing the parsed JSON data. Otherwise, yields the raw HTTP response. Note: The loop will continue indefinitely unless manually stopped. Make sure to break out of the loop once the last page has been received. @@ -1859,16 +1857,15 @@ async def _worker(self): continue acceptable = True - async with self._task_counter.count(f"event_precheck({event})"): - precheck_pass, reason = self._event_precheck(event) + precheck_pass, reason = self._event_precheck(event) if not precheck_pass: self.debug(f"Not intercepting {event} because precheck failed ({reason})") acceptable = False - async with self._task_counter.count(f"event_postcheck({event})"): + else: postcheck_pass, reason = await self._event_postcheck(event) - if not postcheck_pass: - self.debug(f"Not intercepting {event} because postcheck failed ({reason})") - acceptable = False + if not postcheck_pass: + self.debug(f"Not intercepting {event} because postcheck failed ({reason})") + acceptable = False # whether to pass the event on to the rest of the scan # defaults to true, unless handle_event returns False @@ -1895,8 +1892,6 @@ async def _worker(self): await self.forward_event(event, kwargs) except asyncio.CancelledError: - # this trace was used for debugging leaked CancelledErrors from inside httpx - # self.log.trace("Worker cancelled") raise except RuntimeError as e: self.trace(f"RuntimeError in intercept module {self.name}: {e}") diff --git a/bbot/modules/certspotter.py b/bbot/modules/certspotter.py index 69b108080c..1a2c995bb5 100644 --- a/bbot/modules/certspotter.py +++ b/bbot/modules/certspotter.py @@ -20,7 +20,7 @@ def request_url(self, query): async def parse_results(self, r, query): results = set() json = r.json() - if json: + if json and isinstance(json, list): for r in json: for dns_name in r.get("dns_names", []): results.add(dns_name.lstrip(".*").rstrip(".")) diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index aeb695fb6d..76b85d5055 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -44,7 +44,7 @@ async def handle_event(self, event): async def get_parent_event(self, subdomain): start = time.time() - parent_host = await self.helpers.run_in_executor(self.helpers.closest_match, subdomain, self.parent_events) + parent_host = await self.helpers.run_in_executor_cpu(self.helpers.closest_match, subdomain, self.parent_events) elapsed = time.time() - start self.trace(f"{subdomain}: got closest match among {len(self.parent_events):,} parent events in {elapsed:.2f}s") return self.parent_events[parent_host] diff --git a/bbot/modules/ffuf.py b/bbot/modules/ffuf.py deleted file mode 100644 index 4076b02024..0000000000 --- a/bbot/modules/ffuf.py +++ /dev/null @@ -1,358 +0,0 @@ -from bbot.modules.base import BaseModule - -import random -import string -import json -import base64 - - -class ffuf(BaseModule): - watched_events = ["URL"] - produced_events = ["URL_UNVERIFIED"] - flags = ["loud", "active"] - meta = {"description": "A fast web fuzzer written in Go", "created_date": "2022-04-10", "author": "@liquidsec"} - - options = { - "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/Web-Content/raft-small-directories.txt", - "lines": 5000, - "max_depth": 0, - "extensions": "", - "ignore_case": False, - "rate": 0, - } - - options_desc = { - "wordlist": "Specify wordlist to use when finding directories", - "lines": "take only the first N lines from the wordlist when finding directories", - "max_depth": "the maximum directory depth to attempt to solve", - "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", - "ignore_case": "Only put lowercase words into the wordlist", - "rate": "Rate of requests per second (default: 0)", - } - - deps_common = ["ffuf"] - - banned_characters = {" "} - blacklist = ["images", "css", "image"] - - in_scope_only = True - - async def setup_deps(self): - self.wordlist = await self.helpers.wordlist(self.config.get("wordlist")) - return True - - async def setup(self): - self.proxy = self.scan.web_config.get("http_proxy", "") - self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10)) - wordlist_url = self.config.get("wordlist", "") - self.debug(f"Using wordlist [{wordlist_url}]") - self.wordlist_lines = self.generate_wordlist(self.wordlist) - self.tempfile, tempfile_len = self.generate_templist() - self.rate = self.config.get("rate", 0) - self.verbose(f"Generated dynamic wordlist with length [{str(tempfile_len)}]") - try: - self.extensions = self.helpers.chain_lists(self.config.get("extensions", ""), validate=True) - self.debug(f"Using custom extensions: [{','.join(self.extensions)}]") - except ValueError as e: - self.warning(f"Error parsing extensions: {e}") - return False - return True - - async def handle_event(self, event): - if self.helpers.url_depth(event.url) > self.config.get("max_depth"): - self.debug("Exceeded max depth, aborting event") - return - - # only FFUF against a directory - if "." in event.parsed_url.path.split("/")[-1]: - self.debug("Aborting FFUF as period was detected in right-most path segment (likely a file)") - return - else: - # if we think its a directory, normalize it. - fixed_url = event.url.rstrip("/") + "/" - - exts = ["", "/"] - if self.extensions: - for ext in self.extensions: - exts.append(f".{ext}") - - filters = await self.baseline_ffuf(fixed_url, exts=exts) - async for r in self.execute_ffuf(self.tempfile, fixed_url, exts=exts, filters=filters): - await self.emit_event( - r["url"], - "URL_UNVERIFIED", - parent=event, - tags=[f"status-{r['status']}"], - context=f"{{module}} brute-forced {event.url} and found {{event.type}}: {{event.pretty_string}}", - ) - - async def filter_event(self, event): - if "endpoint" in event.tags: - self.debug(f"rejecting URL [{event.url}] because we don't ffuf endpoints") - return False - return True - - async def baseline_ffuf(self, url, exts=[""], prefix="", suffix="", mode="normal"): - filters = {} - for ext in exts: - self.debug(f"running baseline for URL [{url}] with ext [{ext}]") - # For each "extension", we will attempt to build a baseline using 4 requests - - canary_results = [] - - canary_length = 4 - canary_list = [] - for i in range(0, 4): - canary_list.append("".join(random.choice(string.ascii_lowercase) for i in range(canary_length))) - canary_length += 2 - - canary_temp_file = self.helpers.tempfile(canary_list, pipe=False) - async for canary_r in self.execute_ffuf( - canary_temp_file, - url, - prefix=prefix, - suffix=suffix, - mode=mode, - baseline=True, - apply_filters=False, - filters=filters, - ): - canary_results.append(canary_r) - - # First, lets check to make sure we got all 4 requests. If we didn't, there are likely serious connectivity issues. - # We should issue a warning in that case. - - if len(canary_results) != 4: - self.warning( - f"Could not attain baseline for URL [{url}] ext [{ext}] because baseline results are missing. Possible connectivity issues." - ) - filters[ext] = ["ABORT", "CONNECTIVITY_ISSUES"] - continue - - # if the codes are different, we should abort, this should also be a warning, as it is highly unusual behavior - if len({d["status"] for d in canary_results}) != 1: - self.warning("Got different codes for each baseline. This could indicate load balancing") - filters[ext] = ["ABORT", "BASELINE_CHANGED_CODES"] - continue - - # if the code we received was a 404, we are just going to look for cases where we get a different code - if canary_results[0]["status"] == 404: - self.debug("All baseline results were 404, we can just look for anything not 404") - filters[ext] = ["-fc", "404"] - continue - - # if we only got 403, we might already be blocked by a WAF. Issue a warning, but it's possible all 'not founds' are given 403 - if canary_results[0]["status"] == 403: - self.warning( - "All requests of the baseline received a 403 response. It is possible a WAF is actively blocking your traffic." - ) - - # if we only got 429, we are almost certainly getting blocked by a WAF or rate-limiting. Specifically with 429, we should respect them and abort the scan. - if canary_results[0]["status"] == 429: - self.warning( - f"Received code 429 (Too many requests) for URL [{url}]. A WAF or application is actively blocking requests, aborting." - ) - filters[ext] = ["ABORT", "RECEIVED_429"] - continue - - # we start by seeing if all of the baselines have the same character count - if len({d["length"] for d in canary_results}) == 1: - self.debug("All baseline results had the same char count, we can make a filter on that") - filters[ext] = [ - "-fc", - str(canary_results[0]["status"]), - "-fs", - str(canary_results[0]["length"]), - "-fmode", - "and", - ] - continue - - # if that doesn't work we can try words - if len({d["words"] for d in canary_results}) == 1: - self.debug("All baseline results had the same word count, we can make a filter on that") - filters[ext] = [ - "-fc", - str(canary_results[0]["status"]), - "-fw", - str(canary_results[0]["words"]), - "-fmode", - "and", - ] - continue - - # as a last resort we will try lines - if len({d["lines"] for d in canary_results}) == 1: - self.debug("All baseline results had the same word count, we can make a filter on that") - filters[ext] = [ - "-fc", - str(canary_results[0]["status"]), - "-fl", - str(canary_results[0]["lines"]), - "-fmode", - "and", - ] - continue - - # if even the line count isn't stable, we can only reliably count on the result if the code is different - filters[ext] = ["-fc", f"{str(canary_results[0]['status'])}"] - - return filters - - async def execute_ffuf( - self, - tempfile, - url, - prefix="", - suffix="", - exts=[""], - filters={}, - mode="normal", - apply_filters=True, - baseline=False, - ): - for ext in exts: - if mode == "normal": - self.debug("in mode [normal]") - - fuzz_url = f"{url}{prefix}FUZZ{suffix}" - - command = [ - "ffuf", - "-noninteractive", - "-s", - "-H", - f"User-Agent: {self.scan.useragent}", - "-json", - "-w", - tempfile, - "-u", - f"{fuzz_url}{ext}", - ] - - elif mode == "hostheader": - self.debug("in mode [hostheader]") - - command = [ - "ffuf", - "-noninteractive", - "-s", - "-H", - f"User-Agent: {self.scan.useragent}", - "-H", - f"Host: FUZZ{suffix}", - "-json", - "-w", - tempfile, - "-u", - f"{url}", - ] - else: - self.debug("invalid mode specified, aborting") - return - - if self.rate > 0: - command += ["-rate", f"{self.rate}"] - - if self.proxy: - command += ["-x", self.proxy] - - if apply_filters: - if ext in filters.keys(): - if filters[ext][0] == ("ABORT"): - self.warning(f"Exiting from FFUF run early, received an ABORT filter: [{filters[ext][1]}]") - continue - - elif filters[ext] is None: - pass - - else: - command += filters[ext] - else: - command.append("-mc") - command.append("all") - - for hk, hv in self.scan.custom_http_headers.items(): - command += ["-H", f"{hk}: {hv}"] - - async for found in self.run_process_live(command): - try: - found_json = json.loads(found) - input_json = found_json.get("input", {}) - if type(input_json) != dict: - self.debug("Error decoding JSON from ffuf") - continue - encoded_input = input_json.get("FUZZ", "") - input_val = base64.b64decode(encoded_input).decode() - if len(input_val.rstrip()) > 0: - if self.scan.stopping: - break - if input_val.rstrip() == self.canary: - self.debug("Found canary! aborting...") - return - else: - if mode == "normal": - # before emitting, we are going to send another baseline. This will immediately catch things like a WAF flipping blocking on us mid-scan - if baseline is False: - pre_emit_temp_canary = [ - f - async for f in self.execute_ffuf( - self.helpers.tempfile( - ["".join(random.choice(string.ascii_lowercase) for i in range(4))], - pipe=False, - ), - url, - prefix=prefix, - suffix=suffix, - mode=mode, - exts=[ext], - baseline=True, - filters=filters, - ) - ] - if len(pre_emit_temp_canary) == 0: - yield found_json - - else: - self.verbose( - f"Would have reported URL [{found_json['url']}], but baseline check failed. This could be due to a WAF turning on mid-scan, or an unusual web server configuration." - ) - self.verbose(f"Aborting the current run against [{url}]") - return - - yield found_json - - except json.decoder.JSONDecodeError: - self.debug("Received invalid JSON from FFUF") - - def generate_templist(self, prefix=None): - virtual_file = [] - if prefix: - prefix = prefix.strip().lower() - max_lines = self.config.get("lines") - - for line in self.wordlist_lines[:max_lines]: - # Check if it starts with the given prefix (if any) - if (not prefix) or line.lower().startswith(prefix): - virtual_file.append(line) - - virtual_file.append(self.canary) - return self.helpers.tempfile(virtual_file, pipe=False), len(virtual_file) - - def generate_wordlist(self, wordlist_file): - wordlist_set = set() # Use a set to avoid duplicates - ignore_case = self.config.get("ignore_case", False) # Get the ignore_case option - for line in self.helpers.read_file(wordlist_file): - line = line.strip() - if not line: - continue - if line in self.blacklist: - self.debug(f"Skipping adding [{line}] to wordlist because it was in the blacklist") - continue - if any(x in line for x in self.banned_characters): - self.debug(f"Skipping adding [{line}] to wordlist because it has a banned character") - continue - if ignore_case: - line = line.lower() # Convert to lowercase if ignore_case is enabled - wordlist_set.add(line) # Add to set to handle duplicates - return list(wordlist_set) # Convert set back to list before returning diff --git a/bbot/modules/generic_ssrf.py b/bbot/modules/generic_ssrf.py new file mode 100644 index 0000000000..773965e617 --- /dev/null +++ b/bbot/modules/generic_ssrf.py @@ -0,0 +1,264 @@ +from bbot.errors import InteractshError +from bbot.modules.base import BaseModule + + +ssrf_params = [ + "Dest", + "Redirect", + "URI", + "Path", + "Continue", + "URL", + "Window", + "Next", + "Data", + "Reference", + "Site", + "HTML", + "Val", + "Validate", + "Domain", + "Callback", + "Return", + "Page", + "Feed", + "Host", + "Port", + "To", + "Out", + "View", + "Dir", + "Show", + "Navigation", + "Open", +] + + +class BaseSubmodule: + technique_description = "base technique description" + severity = "INFO" + paths = [] + + def __init__(self, generic_ssrf): + self.generic_ssrf = generic_ssrf + self.test_paths = self.create_paths() + + def set_base_url(self, event): + return f"{event.parsed_url.scheme}://{event.parsed_url.netloc}" + + def create_paths(self): + return self.paths + + async def test(self, event): + base_url = self.set_base_url(event) + for test_path_result in self.test_paths: + for lower in [True, False]: + test_path = test_path_result[0] + if lower: + test_path = test_path.lower() + subdomain_tag = test_path_result[1] + test_url = f"{base_url}{test_path}" + self.generic_ssrf.debug(f"Sending request to URL: {test_url}") + r = await self.generic_ssrf.helpers.request(url=test_url) + if r: + self.process(event, r.text, subdomain_tag) + + def process(self, event, r, subdomain_tag): + response_token = self.generic_ssrf.interactsh_domain.split(".")[0][::-1] + if response_token in r: + echoed_response = True + else: + echoed_response = False + + self.generic_ssrf.interactsh_subdomain_tags[subdomain_tag] = ( + event, + self.technique_description, + self.severity, + echoed_response, + ) + + +class Generic_SSRF(BaseSubmodule): + technique_description = "Generic SSRF (GET)" + severity = "HIGH" + + def set_base_url(self, event): + return event.url + + def create_paths(self): + test_paths = [] + for param in ssrf_params: + query_string = "" + subdomain_tag = self.generic_ssrf.helpers.rand_string(4) + ssrf_canary = f"{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" + self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param + query_string += f"{param}=http://{ssrf_canary}&" + test_paths.append((f"?{query_string.rstrip('&')}", subdomain_tag)) + return test_paths + + +class Generic_SSRF_POST(BaseSubmodule): + technique_description = "Generic SSRF (POST)" + severity = "HIGH" + + def set_base_url(self, event): + return event.url + + async def test(self, event): + test_url = event.url + + post_data = {} + for param in ssrf_params: + subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) + self.generic_ssrf.parameter_subdomain_tags_map[subdomain_tag] = param + post_data[param] = f"http://{subdomain_tag}.{self.generic_ssrf.interactsh_domain}" + + subdomain_tag_lower = self.generic_ssrf.helpers.rand_string(4, digits=False) + post_data_lower = { + k.lower(): f"http://{subdomain_tag_lower}.{self.generic_ssrf.interactsh_domain}" + for k, v in post_data.items() + } + + post_data_list = [(subdomain_tag, post_data), (subdomain_tag_lower, post_data_lower)] + + for tag, pd in post_data_list: + # Send raw body (not URL-encoded) so payload URLs like http://... reach the + # server literally — matching old curl -d behavior. + raw_body = "&".join(f"{k}={v}" for k, v in pd.items()) + r = await self.generic_ssrf.helpers.request(url=test_url, method="POST", body=raw_body) + if r: + self.process(event, r.text, tag) + + +class Generic_XXE(BaseSubmodule): + technique_description = "Generic XXE" + severity = "HIGH" + paths = None + + async def test(self, event): + rand_entity = self.generic_ssrf.helpers.rand_string(4, digits=False) + subdomain_tag = self.generic_ssrf.helpers.rand_string(4, digits=False) + + post_body = f""" + + +]> +&{rand_entity};""" + test_url = event.parsed_url.geturl() + r = await self.generic_ssrf.helpers.request( + url=test_url, method="POST", body=post_body, headers={"Content-type": "application/xml"} + ) + if r: + self.process(event, r.text, subdomain_tag) + + +class generic_ssrf(BaseModule): + watched_events = ["URL"] + produced_events = ["FINDING"] + flags = ["active", "invasive", "web-heavy"] + meta = {"description": "Check for generic SSRFs", "created_date": "2022-07-30", "author": "@liquidsec"} + options = { + "skip_dns_interaction": False, + } + options_desc = { + "skip_dns_interaction": "Do not report DNS interactions (only HTTP interaction)", + } + in_scope_only = True + + async def setup(self): + self.submodules = {} + self.interactsh_subdomain_tags = {} + self.parameter_subdomain_tags_map = {} + self.severity = None + self.skip_dns_interaction = self.config.get("skip_dns_interaction", False) + + if self.scan.config.get("interactsh_disable", False) is False: + try: + self.interactsh_instance = self.helpers.interactsh() + self.interactsh_domain = await self.interactsh_instance.register(callback=self.interactsh_callback) + except InteractshError as e: + self.warning(f"Interactsh failure: {e}") + return False + else: + self.warning( + "The generic_ssrf module is completely dependent on interactsh to function, but it is disabled globally. Aborting." + ) + return None + + # instantiate submodules + for m in BaseSubmodule.__subclasses__(): + if m.__name__.startswith("Generic_"): + self.verbose(f"Starting generic_ssrf submodule: {m.__name__}") + self.submodules[m.__name__] = m(self) + + return True + + async def handle_event(self, event): + for s in self.submodules.values(): + await s.test(event) + + async def interactsh_callback(self, r): + protocol = r.get("protocol").upper() + if protocol == "DNS" and self.skip_dns_interaction: + return + + full_id = r.get("full-id", None) + subdomain_tag = full_id.split(".")[0] + + if full_id: + if "." in full_id: + match = self.interactsh_subdomain_tags.get(subdomain_tag) + if not match: + return + matched_event = match[0] + matched_technique = match[1] + matched_severity = match[2] + matched_echoed_response = str(match[3]) + + triggering_param = self.parameter_subdomain_tags_map.get(subdomain_tag, None) + description = f"Out-of-band interaction: [{matched_technique}]" + if triggering_param: + self.debug(f"Found triggering parameter: {triggering_param}") + description += f" [Triggering Parameter: {triggering_param}]" + description += f" [{protocol}] Echoed Response: {matched_echoed_response}" + + self.debug(f"Emitting event with description: {description}") # Debug the final description + + event_data = { + "host": str(matched_event.host), + "url": matched_event.url, + "name": matched_technique, + "description": description, + "severity": matched_severity if protocol == "HTTP" else "LOW", + "confidence": "CONFIRMED" if protocol == "HTTP" else "MEDIUM", + } + + await self.emit_event( + event_data, + "FINDING", + matched_event, + context=f"{{module}} scanned {matched_event.url} and detected {{event.type}}: {matched_technique}", + ) + else: + # this is likely caused by something trying to resolve the base domain first and can be ignored + self.debug("skipping result because subdomain tag was missing") + + async def cleanup(self): + if self.scan.config.get("interactsh_disable", False) is False: + try: + await self.interactsh_instance.deregister() + self.debug( + f"successfully deregistered interactsh session with correlation_id {self.interactsh_instance.correlation_id}" + ) + except InteractshError as e: + self.warning(f"Interactsh failure: {e}") + + async def finish(self): + if self.scan.config.get("interactsh_disable", False) is False: + await self.helpers.sleep(5) + try: + for r in await self.interactsh_instance.poll(): + await self.interactsh_callback(r) + except InteractshError as e: + self.debug(f"Error in interact.sh: {e}") diff --git a/bbot/modules/git.py b/bbot/modules/git.py index 229adee35d..cb53124209 100644 --- a/bbot/modules/git.py +++ b/bbot/modules/git.py @@ -24,7 +24,7 @@ async def handle_event(self, event): self.helpers.urljoin(base_url, ".git/config"), self.helpers.urljoin(f"{base_url}/", ".git/config"), } - async for url, response in self.helpers.request_batch(urls): + for url, response in await self.helpers.request_batch(urls): text = getattr(response, "text", "") if not text: text = "" diff --git a/bbot/modules/github_codesearch.py b/bbot/modules/github_codesearch.py index 6901221a30..e5e941478b 100644 --- a/bbot/modules/github_codesearch.py +++ b/bbot/modules/github_codesearch.py @@ -32,7 +32,7 @@ async def handle_event(self, event): context=f'{{module}} searched github.com for "{query}" and found {{event.type}} with matching content at {repo_url}', ) for raw_url in raw_urls: - url_event = self.make_event(raw_url, "URL_UNVERIFIED", parent=repo_event, tags=["httpx-safe"]) + url_event = self.make_event(raw_url, "URL_UNVERIFIED", parent=repo_event, tags=["blasthttp-safe"]) if not url_event: continue await self.emit_event( diff --git a/bbot/modules/host_header.py b/bbot/modules/host_header.py index 8d49b70c19..02e9d60764 100644 --- a/bbot/modules/host_header.py +++ b/bbot/modules/host_header.py @@ -15,8 +15,6 @@ class host_header(BaseModule): in_scope_only = True per_hostport_only = True - deps_apt = ["curl"] - async def setup(self): self.subdomain_tags = {} if self.scan.config.get("interactsh_disable", False) is False: @@ -103,13 +101,13 @@ async def handle_event(self, event): self.debug(f"Performing {technique_description} case") subdomain_tag = self.rand_string(4, digits=False) self.subdomain_tags[subdomain_tag] = (event, technique_description) - output = await self.helpers.curl( + response = await self.helpers.request( url=url, headers={"Host": f"{subdomain_tag}.{self.domain}"}, ignore_bbot_global_settings=True, cookies=added_cookies, ) - if self.domain in output: + if response and self.domain in response.text: domain_reflections.append(technique_description) # absolute URL / Host header transposition @@ -117,28 +115,26 @@ async def handle_event(self, event): self.debug(f"Performing {technique_description} case") subdomain_tag = self.rand_string(4, digits=False) self.subdomain_tags[subdomain_tag] = (event, technique_description) - output = await self.helpers.curl( + response = await self.helpers.request( url=url, - path_override=url, + request_target=url, cookies=added_cookies, ) - - if self.domain in output: + if response and self.domain in response.text: domain_reflections.append(technique_description) # duplicate host header tolerance technique_description = "duplicate host header tolerance" - output = await self.helpers.curl( + response = await self.helpers.request( url=url, - # Sending a blank HOST first as a hack to trick curl. This makes it no longer an "internal header", thereby allowing for duplicates - # The fact that it's accepting two host headers is rare enough to note on its own, and not too noisy. Having the 3rd header be an interactsh would result in false negatives for the slightly less interesting cases. + method="HEAD", + # Sending duplicate Host headers to test server tolerance. + # The fact that it's accepting two host headers is rare enough to note on its own. headers={"Host": ["", str(event.host), str(event.host)]}, cookies=added_cookies, - head_mode=True, ) - split_output = output.split("\n") - if " 4" in split_output: + if response and response.status_code >= 400 and response.status_code < 500: description = "Duplicate Host Header Tolerated" await self.emit_event( { @@ -174,12 +170,12 @@ async def handle_event(self, event): for oh in override_headers_list: override_headers[oh] = f"{subdomain_tag}.{self.domain}" - output = await self.helpers.curl( + response = await self.helpers.request( url=url, headers=override_headers, cookies=added_cookies, ) - if self.domain in output: + if response and self.domain in response.text: domain_reflections.append(technique_description) # emit all the domain reflections we found diff --git a/bbot/modules/http.py b/bbot/modules/http.py new file mode 100644 index 0000000000..d84530d79d --- /dev/null +++ b/bbot/modules/http.py @@ -0,0 +1,322 @@ +import re +from http.cookies import SimpleCookie +from urllib.parse import urlparse + +import blasthttp + +from bbot.modules.base import BaseModule + + +class http(BaseModule): + watched_events = ["OPEN_TCP_PORT", "URL_UNVERIFIED", "URL"] + produced_events = ["URL", "HTTP_RESPONSE"] + flags = ["active", "safe", "web", "social-enum", "subdomain-enum", "cloud-enum"] + meta = { + "description": "Visit webpages using blasthttp (native Rust HTTP engine)", + "created_date": "2026-03-08", + "author": "@liquidsec", + } + + options = { + "threads": 50, + "in_scope_only": True, + "max_response_size": 5242880, + "store_responses": False, + } + options_desc = { + "threads": "Number of concurrent requests", + "in_scope_only": "Only visit web resources that are in scope.", + "max_response_size": "Max response size in bytes", + "store_responses": "Save raw HTTP responses to scan folder", + } + + scope_distance_modifier = 2 + _shuffle_incoming_queue = False + _batch_size = 500 + _priority = 2 + # accept Javascript URLs + accept_url_special = True + + async def setup(self): + self.threads = self.config.get("threads", 50) + self.max_response_size = self.config.get("max_response_size", 5242880) + self.store_responses = self.config.get("store_responses", False) + self.client = self.helpers.blasthttp + return True + + async def filter_event(self, event): + if "_wildcard" in str(event.host).split("."): + return False, "event is wildcard" + + if "unresolved" in event.tags: + return False, "event is unresolved" + + if event.module == self: + return False, "event is from self" + + if "spider-max" in event.tags: + return False, "event exceeds spidering limits" + + # scope filtering + in_scope_only = self.config.get("in_scope_only", True) + if "blasthttp-safe" in event.tags: + return True + max_scope_distance = 0 if in_scope_only else (self.scan.scope_search_distance + 1) + if event.scope_distance > max_scope_distance: + return False, "event is not in scope" + return True + + def make_url_metadata(self, event): + """Returns (urls, url_hash) where urls is a list (usually 1 item, but 2 for OPEN_TCP_PORT).""" + has_spider_max = "spider-max" in event.tags + url_hash = None + if event.type.startswith("URL"): + # we NEED the port, otherwise it will try HTTPS even for HTTP URLs + url = event.with_port().geturl() + if event.parsed_url.path == "/": + url_hash = hash((event.host, event.port, has_spider_max)) + urls = [url] + else: + # OPEN_TCP_PORT — probe both http and https + host = event.host + port = event.port + urls = [f"http://{host}:{port}/", f"https://{host}:{port}/"] + url_hash = hash((host, port, has_spider_max)) + if url_hash is None: + url_hash = hash((urls[0], has_spider_max)) + return urls, url_hash + + def _incoming_dedup_hash(self, event): + urls, url_hash = self.make_url_metadata(event) + return url_hash + + def _build_headers(self): + """Build list of (name, value) header tuples from scan config.""" + headers = [("User-Agent", self.scan.useragent)] + for hk, hv in self.scan.custom_http_headers.items(): + headers.append((hk, hv)) + if self.scan.custom_http_cookies: + cookie = SimpleCookie() + for ck, cv in self.scan.custom_http_cookies.items(): + cookie[ck] = cv + cookie_value = cookie.output(header="", sep="; ").strip() + headers.append(("Cookie", cookie_value)) + return headers + + def _response_to_json(self, url_input, response): + """Convert a blasthttp Response to a dict for HTTP_RESPONSE events.""" + parsed = urlparse(response.url) + path = parsed.path or "/" + + # Build raw_header string (required by HTTP_RESPONSE validation) + status_line = f"HTTP/1.1 {response.status} \r\n" + header_lines = "\r\n".join(f"{k}: {v}" for k, v in response.headers) + raw_header = f"{status_line}{header_lines}\r\n\r\n" + + # Build header dict (lowercase keys, comma-joined for dupes) + header_dict = {} + for k, v in response.headers: + key = k.lower().replace("-", "_") + if key in header_dict: + header_dict[key] += f", {v}" + else: + header_dict[key] = v + + content_type = header_dict.get("content_type", "") + content_length = int(header_dict.get("content_length", len(response.body_bytes))) + + # Location header for redirects (excavate uses event.redirect_location) + location = header_dict.get("location", "") + + # Extract title from HTML + title = "" + body = response.body + title_match = re.search(r"]*>(.*?)", body, re.IGNORECASE | re.DOTALL) + if title_match: + title = title_match.group(1).strip() + + j = { + "url": response.url, + "input": url_input, + "status_code": response.status, + "method": "GET", + "path": path, + "host": parsed.hostname or "", + "raw_header": raw_header, + "header": header_dict, + "content_type": content_type, + "content_length": content_length, + "title": title, + "body": body, + "location": location, + "hash": { + "body_md5": response.hash.body_md5, + "body_mmh3": response.hash.body_mmh3, + "body_sha256": response.hash.body_sha256, + "header_md5": response.hash.header_md5, + "header_mmh3": response.hash.header_mmh3, + "header_sha256": response.hash.header_sha256, + }, + } + + # Include TLS certificate info when available (HTTPS responses) + ci = response.cert_info + if ci is not None: + j["cert_info"] = { + "common_name": ci.common_name, + "sans": ci.sans, + "emails": ci.emails, + "issuer": ci.issuer, + "not_before": ci.not_before, + "not_after": ci.not_after, + "fingerprint_sha256": ci.fingerprint_sha256, + } + + return j + + async def handle_batch(self, *events): + stdin = {} + # Track dual-scheme probes from OPEN_TCP_PORT: {(host, port): {"http": url, "https": url}} + port_probes = {} + + for event in events: + urls, url_hash = self.make_url_metadata(event) + for url in urls: + stdin[url] = event + if event.type == "OPEN_TCP_PORT": + key = (event.host, event.port) + if key not in port_probes: + port_probes[key] = {} + scheme = "https" if url.startswith("https://") else "http" + port_probes[key][scheme] = url + + if not stdin: + return + + headers = self._build_headers() + proxy = self.scan.http_proxy or None + timeout = self.scan.blasthttp_timeout + retries = self.scan.blasthttp_retries + + # Build batch configs + configs = [] + for url in stdin: + config = blasthttp.BatchConfig( + url, + headers=headers, + timeout=int(timeout), + retries=int(retries), + verify_certs=False, + follow_redirects=False, + proxy=proxy, + ) + configs.append(config) + + # blasthttp batch returns a native coroutine via pyo3-async-runtimes + results = await self.client.request_batch(configs, self.threads) + + # Index results by URL for the dedup check + results_by_url = {r.url: r for r in results} + + # For OPEN_TCP_PORT probes, suppress redundant https when http already succeeded. + # When probing an unknown port, we try both http:// and https://. If http works, + # the port definitely speaks HTTP — the https result may be a proxy artifact + # (intercepting proxies like Burp terminate TLS themselves, making any https:// + # URL "succeed" regardless of whether the target actually speaks TLS). + # If http fails but https succeeds, the port genuinely speaks TLS. + # Explicit URLs (URL_UNVERIFIED/URL) are never suppressed — this only applies + # to speculative OPEN_TCP_PORT probes. + suppressed_urls = set() + for key, schemes in port_probes.items(): + http_url = schemes.get("http") + https_url = schemes.get("https") + if not (http_url and https_url): + continue + http_result = results_by_url.get(http_url) + if http_result and http_result.success and http_result.response.status != 0: + if https_url in results_by_url: + self.debug(f"Suppressing https probe {https_url} (http already succeeded: {http_url})") + suppressed_urls.add(https_url) + + for result in results: + if not result.success: + self.debug(f"blasthttp error for {result.url}: {result.error}") + continue + + response = result.response + status_code = response.status + if status_code == 0: + self.debug(f'No HTTP status code for "{result.url}"') + continue + + if result.url in suppressed_urls: + continue + + # Map back to parent event using the input URL + parent_event = stdin.get(result.url, None) + + if parent_event is None: + self.warning(f"Unable to correlate parent event for: {result.url}") + continue + + url = response.url + + # Build JSON dict for HTTP_RESPONSE event + # The "input" field represents the original scan target (host:port), + # not the full URL. Other modules and output consumers use this to + # correlate responses back to the target that produced them. + input_parsed = urlparse(result.url) + url_input = input_parsed.netloc or result.url + j = self._response_to_json(url_input, response) + + # discard 404s from unverified URLs + path = j.get("path", "/") + if parent_event.type == "URL_UNVERIFIED" and status_code in (404,) and path != "/": + self.debug(f'Discarding 404 from "{url}"') + continue + + # main URL + tags = [f"status-{status_code}"] + + url_context = "{module} visited {event.parent.data} and got status code {event.http_status}" + if parent_event.type == "OPEN_TCP_PORT": + url_context += " at {event.data}" + + url_event = self.make_event( + url, + "URL", + parent_event, + tags=tags, + context=url_context, + ) + if url_event: + response_ip = j.get("host", "") + if response_ip: + url_event._resolved_hosts.add(response_ip) + title = j.get("title", "") + if title: + url_event.http_title = title + location = j.get("location", "") + if location: + url_event.redirect_location = location + if url_event != parent_event: + await self.emit_event(url_event) + # HTTP response + content_type = j.get("header", {}).get("content_type", "unspecified").split(";")[0] + content_length = j.get("content_length", 0) + content_length = self.helpers.bytes_to_human(content_length) + await self.emit_event( + j, + "HTTP_RESPONSE", + url_event, + tags=url_event.tags, + context=f"HTTP_RESPONSE was {content_length} with {content_type} content type", + ) + + # Store responses if configured + if self.store_responses: + response_dir = self.scan.home / "http_responses" + self.helpers.mkdir(response_dir) + filename = f"{j['host']}.{urlparse(url).port or 443}{path.replace('/', '[slash]')}.txt" + response_file = response_dir / filename + response_file.write_text(j.get("raw_header", "") + j.get("body", "")) diff --git a/bbot/modules/httpx.py b/bbot/modules/httpx.py deleted file mode 100644 index 860894015b..0000000000 --- a/bbot/modules/httpx.py +++ /dev/null @@ -1,232 +0,0 @@ -import re -import sys -import orjson -import tempfile -import subprocess -from pathlib import Path -from http.cookies import SimpleCookie - -from bbot.modules.base import BaseModule - - -class httpx(BaseModule): - httpx_tempdir_regex = re.compile(r"^httpx\d+$") - watched_events = ["OPEN_TCP_PORT", "URL_UNVERIFIED", "URL"] - produced_events = ["URL", "HTTP_RESPONSE"] - flags = ["safe", "active", "web", "social-enum", "subdomain-enum", "cloud-enum"] - meta = { - "description": "Visit webpages. Many other modules rely on httpx", - "created_date": "2022-07-08", - "author": "@TheTechromancer", - } - - options = { - "threads": 50, - "in_scope_only": True, - "version": "1.2.5", - "max_response_size": 5242880, - "store_responses": False, - "probe_all_ips": False, - } - options_desc = { - "threads": "Number of httpx threads to use", - "in_scope_only": "Only visit web reparents that are in scope.", - "version": "httpx version", - "max_response_size": "Max response size in bytes", - "store_responses": "Save raw HTTP responses to scan folder", - "probe_all_ips": "Probe all the ips associated with same host", - } - deps_ansible = [ - { - "name": "Download httpx", - "unarchive": { - "src": "https://github.com/projectdiscovery/httpx/releases/download/v#{BBOT_MODULES_HTTPX_VERSION}/httpx_#{BBOT_MODULES_HTTPX_VERSION}_#{BBOT_OS}_#{BBOT_CPU_ARCH_GOLANG}.zip", - "include": "httpx", - "dest": "#{BBOT_TOOLS}", - "remote_src": True, - }, - } - ] - - scope_distance_modifier = 2 - _shuffle_incoming_queue = False - _batch_size = 500 - _priority = 2 - # accept Javascript URLs - accept_url_special = True - - async def setup(self): - self.threads = self.config.get("threads", 50) - self.max_response_size = self.config.get("max_response_size", 5242880) - self.store_responses = self.config.get("store_responses", False) - self.probe_all_ips = self.config.get("probe_all_ips", False) - return True - - async def filter_event(self, event): - if "_wildcard" in str(event.host).split("."): - return False, "event is wildcard" - - if "unresolved" in event.tags: - return False, "event is unresolved" - - if event.module == self: - return False, "event is from self" - - if "spider-max" in event.tags: - return False, "event exceeds spidering limits" - - # scope filtering - in_scope_only = self.config.get("in_scope_only", True) - if "httpx-safe" in event.tags: - return True - max_scope_distance = 0 if in_scope_only else (self.scan.scope_search_distance + 1) - if event.scope_distance > max_scope_distance: - return False, "event is not in scope" - return True - - def make_url_metadata(self, event): - has_spider_max = "spider-max" in event.tags - url_hash = None - if event.type.startswith("URL"): - # we NEED the port, otherwise httpx will try HTTPS even for HTTP URLs - url = event.with_port().geturl() - if event.parsed_url.path == "/": - url_hash = hash((event.host, event.port, has_spider_max)) - else: - url = str(event.data) - url_hash = hash((event.host, event.port, has_spider_max)) - if url_hash is None: - url_hash = hash((url, has_spider_max)) - return url, url_hash - - def _incoming_dedup_hash(self, event): - url, url_hash = self.make_url_metadata(event) - return url_hash - - async def handle_batch(self, *events): - stdin = {} - - for event in events: - url, url_hash = self.make_url_metadata(event) - stdin[url] = event - - if not stdin: - return - - command = [ - "httpx", - "-silent", - "-json", - "-include-response", - "-threads", - self.threads, - "-timeout", - self.scan.httpx_timeout, - "-retries", - self.scan.httpx_retries, - "-header", - f"User-Agent: {self.scan.useragent}", - "-response-size-to-read", - f"{self.max_response_size}", - ] - - if self.store_responses: - response_dir = self.scan.home / "httpx" - self.helpers.mkdir(response_dir) - command += ["-srd", str(response_dir)] - - dns_resolvers = ",".join(self.helpers.system_resolvers) - if dns_resolvers: - command += ["-r", dns_resolvers] - - if self.probe_all_ips: - command += ["-probe-all-ips"] - - # Add custom HTTP headers - for hk, hv in self.scan.custom_http_headers.items(): - command += ["-header", f"{hk}: {hv}"] - - # Add custom HTTP cookies as a single header - if self.scan.custom_http_cookies: - cookie = SimpleCookie() - for ck, cv in self.scan.custom_http_cookies.items(): - cookie[ck] = cv - - # Build the cookie header - cookie_header = f"Cookie: {cookie.output(header='', sep='; ').strip()}" - command += ["-header", cookie_header] - - proxy = self.scan.http_proxy - if proxy: - command += ["-http-proxy", proxy] - async for line in self.run_process_live(command, text=False, input=list(stdin), stderr=subprocess.DEVNULL): - try: - j = await self.helpers.run_in_executor(orjson.loads, line) - except orjson.JSONDecodeError: - self.warning(f"httpx failed to decode line: {line}") - continue - - url = j.get("url", "") - status_code = int(j.get("status_code", 0)) - if status_code == 0: - self.debug(f'No HTTP status code for "{url}"') - continue - - parent_event = stdin.get(j.get("input", ""), None) - - if parent_event is None: - self.warning(f"Unable to correlate parent event from: {line}") - continue - - # discard 404s from unverified URLs - path = j.get("path", "/") - if parent_event.type == "URL_UNVERIFIED" and status_code in (404,) and path != "/": - self.debug(f'Discarding 404 from "{url}"') - continue - - # main URL - tags = [f"status-{status_code}"] - - url_context = "{module} visited {event.parent.pretty_string} and got status code {event.http_status}" - if parent_event.type == "OPEN_TCP_PORT": - url_context += " at {event.pretty_string}" - - url_event = self.make_event( - url, - "URL", - parent_event, - tags=tags, - context=url_context, - ) - if url_event: - httpx_ip = j.get("host", "") - if httpx_ip: - url_event._resolved_hosts.add(sys.intern(httpx_ip)) - url_event.data["status_code"] = status_code - title = j.get("title", "") - if title: - url_event.http_title = title - location = j.get("header", {}).get("location", "") - if location: - url_event.redirect_location = location - if url_event != parent_event: - await self.emit_event(url_event) - # HTTP response - content_type = j.get("header", {}).get("content_type", "unspecified").split(";")[0] - content_length = j.get("content_length", 0) - content_length = self.helpers.bytes_to_human(content_length) - await self.emit_event( - j, - "HTTP_RESPONSE", - url_event, - tags=url_event.tags, - context=f"HTTP_RESPONSE was {content_length} with {content_type} content type", - ) - - for tempdir in Path(tempfile.gettempdir()).iterdir(): - if tempdir.is_dir() and self.httpx_tempdir_regex.match(tempdir.name): - self.helpers.rm_rf(tempdir) - - async def cleanup(self): - resume_file = self.helpers.current_dir / "resume.cfg" - resume_file.unlink(missing_ok=True) diff --git a/bbot/modules/iis_shortnames.py b/bbot/modules/iis_shortnames.py index 5dabd7cb2f..5afd8831f2 100644 --- a/bbot/modules/iis_shortnames.py +++ b/bbot/modules/iis_shortnames.py @@ -30,7 +30,7 @@ class iis_shortnames(BaseModule): } in_scope_only = True - _module_threads = 8 + _module_threads = 4 async def detect(self, target): technique = None @@ -132,7 +132,7 @@ async def solve_valid_chars(self, method, target, affirmative_status_code): url = f"{target}{payload}{suffix}" urls_and_kwargs.append((url, kwargs, (c, file_part))) - async for url, kwargs, (c, file_part), response in self.helpers.request_custom_batch(urls_and_kwargs): + for url, response, (c, file_part) in await self.helpers.request_batch(urls_and_kwargs): if response is not None: if response.status_code == affirmative_status_code: if file_part == "stem": @@ -173,7 +173,7 @@ async def solve_shortname_recursive( kwargs = {"method": method} urls_and_kwargs.append((url, kwargs, c)) - async for url, kwargs, c, response in self.helpers.request_custom_batch(urls_and_kwargs): + for url, response, c in await self.helpers.request_batch(urls_and_kwargs): if response is not None: if response.status_code == affirmative_status_code: found_results = True diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index e2e450a8fb..9db511c90b 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -348,7 +348,7 @@ class excavateTestRule(ExcavateRule): scope_distance_modifier = None accept_dupes = False - _module_threads = 8 + _module_threads = 6 yara_rule_name_regex = re.compile(r"rule\s(\w+)\s{") yara_rule_regex = re.compile(r"(?s)((?:rule\s+\w+\s*{[^{}]*(?:{[^{}]*}[^{}]*)*[^{}]*(?:/\S*?}[^/]*?/)*)*})") @@ -1145,7 +1145,7 @@ async def search(self, data, event, content_type, discovery_context="HTTP respon for label, data_instance in data_items: # Your existing processing code - for result in self.yara_rules.match(data=f"{data_instance}"): + for result in await self.helpers.run_in_executor_cpu(self.yara_rules.match, data=f"{data_instance}"): rule_name = result.rule # Skip specific operations for 'parameter_extraction' rule on decoded_data diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index 0e31e9158b..fb680ab230 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -157,7 +157,7 @@ async def handle_event(self, event): # speculate URL_UNVERIFIED from URL or any event with "url" attribute event_is_url = event.type == "URL" event_has_url = not event.type.startswith("URL") and isinstance(event.data, dict) and "url" in event.data - event_tags = ["httpx-safe"] if event.type in ("CODE_REPOSITORY", "SOCIAL") else [] + event_tags = ["blasthttp-safe"] if event.type in ("CODE_REPOSITORY", "SOCIAL") else [] if event_is_url or event_has_url: url = event.url # only emit the url if it's not already in the event's history diff --git a/bbot/modules/newsletters.py b/bbot/modules/newsletters.py index e6d8626fd4..409c295689 100644 --- a/bbot/modules/newsletters.py +++ b/bbot/modules/newsletters.py @@ -39,7 +39,7 @@ async def handle_event(self, event): _event = event # Call find_type Function if Webpage return Status Code 200 && "body" is found in event.data - # Ex: 'bbot -m httpx newsletters -t https://apf-api.eng.vn.cloud.tesla.com' returns + # Ex: 'bbot -m blasthttp newsletters -t https://apf-api.eng.vn.cloud.tesla.com' returns # Status Code 200 but does NOT have event.data["body"] if _event.data["status_code"] == 200: if "body" in _event.data: diff --git a/bbot/modules/ntlm.py b/bbot/modules/ntlm.py index 164f26efd5..b46b938617 100644 --- a/bbot/modules/ntlm.py +++ b/bbot/modules/ntlm.py @@ -96,10 +96,10 @@ async def handle_event(self, event): urls.add(f"{event.parsed_url.scheme}://{event.parsed_url.netloc}/{endpoint}") num_urls = len(urls) - agen = self.helpers.request_batch( + results = await self.helpers.request_batch( urls, headers=NTLM_test_header, allow_redirects=False, timeout=self.http_timeout ) - async for url, response in agen: + for url, response in results: ntlm_resp = response.headers.get("WWW-Authenticate", "") if not ntlm_resp: continue @@ -109,7 +109,6 @@ async def handle_event(self, event): if not ntlm_resp_decoded: continue - await agen.aclose() self.found.add(found_hash) fqdn = ntlm_resp_decoded.get("FQDN", "") await self.emit_event( diff --git a/bbot/modules/nuclei.py b/bbot/modules/nuclei.py index 6602d03e51..27e272350b 100644 --- a/bbot/modules/nuclei.py +++ b/bbot/modules/nuclei.py @@ -107,7 +107,11 @@ async def setup(self): self.info( "Running nuclei in TECHNOLOGY mode. Scans will only be performed with the --automatic-scan flag set. This limits the templates used to those that match wappalyzer signatures" ) - self.tags = "" + # Don't clear user-specified tags — they act as additional filters + # alongside -as, narrowing the auto-selected template set. + # Only clear tags if the user didn't explicitly set them. + if not self.tags: + self.tags = "" if self.mode == "severe": self.info( diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 064c00af7c..317eebc3a1 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -1,7 +1,7 @@ -from .http import HTTP +from .webhook import webhook -class Elastic(HTTP): +class Elastic(webhook): """ docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 """ diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py index ba4205940d..3d8b470a37 100644 --- a/bbot/modules/output/rabbitmq.py +++ b/bbot/modules/output/rabbitmq.py @@ -25,14 +25,22 @@ async def setup(self): self.rabbitmq_url = self.config.get("url", "amqp://guest:guest@localhost/") self.queue_name = self.config.get("queue", "bbot_events") - # Connect to RabbitMQ - self.connection = await aio_pika.connect_robust(self.rabbitmq_url) - self.channel = await self.connection.channel() - - # Declare the queue - self.queue = await self.channel.declare_queue(self.queue_name, durable=True) - self.verbose("RabbitMQ connection and queue setup successfully") - return True + # Connect to RabbitMQ (retry in case the server is still starting) + max_retries = 30 + for attempt in range(max_retries): + try: + self.connection = await aio_pika.connect_robust(self.rabbitmq_url) + self.channel = await self.connection.channel() + self.queue = await self.channel.declare_queue(self.queue_name, durable=True) + self.verbose("RabbitMQ connection and queue setup successfully") + return True + except Exception as e: + if attempt < max_retries - 1: + self.verbose(f"RabbitMQ not ready (attempt {attempt + 1}/{max_retries}): {e}") + await self.helpers.sleep(1) + else: + self.error(f"Failed to connect to RabbitMQ after {max_retries} attempts: {e}") + return False async def handle_event(self, event): event_json = event.json() @@ -52,5 +60,6 @@ async def handle_event(self, event): async def cleanup(self): # Close the connection - await self.connection.close() - self.verbose("RabbitMQ connection closed successfully") + if hasattr(self, "connection"): + await self.connection.close() + self.verbose("RabbitMQ connection closed successfully") diff --git a/bbot/modules/output/web_report.py b/bbot/modules/output/web_report.py index 4cd2412046..a306e1bf70 100644 --- a/bbot/modules/output/web_report.py +++ b/bbot/modules/output/web_report.py @@ -4,7 +4,7 @@ class web_report(BaseOutputModule): - watched_events = ["URL", "TECHNOLOGY", "FINDING", "VHOST"] + watched_events = ["URL", "TECHNOLOGY", "FINDING"] meta = { "description": "Create a markdown report with web assets", "created_date": "2023-02-08", diff --git a/bbot/modules/output/http.py b/bbot/modules/output/webhook.py similarity index 98% rename from bbot/modules/output/http.py rename to bbot/modules/output/webhook.py index 28fa917fc7..0070d91cf4 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/webhook.py @@ -4,10 +4,10 @@ from bbot.modules.output.base import BaseOutputModule -class HTTP(BaseOutputModule): +class webhook(BaseOutputModule): watched_events = ["*"] meta = { - "description": "Send every event to a custom URL via a web request", + "description": "Send every event to a custom URL via a webhook", "created_date": "2022-04-13", "author": "@TheTechromancer", } diff --git a/bbot/modules/paramminer_cookies.py b/bbot/modules/paramminer_cookies.py index 871238d803..3bd7f0705c 100644 --- a/bbot/modules/paramminer_cookies.py +++ b/bbot/modules/paramminer_cookies.py @@ -27,7 +27,7 @@ class paramminer_cookies(paramminer_headers): options_desc = {"wordlist": "Define the wordlist to be used to derive cookies"} scanned_hosts = [] boring_words = set() - _module_threads = 12 + _module_threads = 4 in_scope_only = True compare_mode = "cookie" default_wordlist = "paramminer_parameters.txt" @@ -36,11 +36,8 @@ async def check_batch(self, compare_helper, url, cookie_list): cookies = {p: self.rand_string(14) for p in cookie_list} return await compare_helper.compare(url, cookies=cookies, check_reflection=(len(cookie_list) == 1)) - def gen_count_args(self, url): - cookie_count = 40 - while 1: - if cookie_count < 0: - break - fake_cookies = {self.rand_string(14): self.rand_string(14) for _ in range(0, cookie_count)} - yield cookie_count, (url,), {"cookies": fake_cookies} - cookie_count -= 5 + max_count = 40 + + def build_count_test_request(self, url, count): + fake_cookies = {self.rand_string(14): self.rand_string(14) for _ in range(count)} + return (url,), {"cookies": fake_cookies} diff --git a/bbot/modules/paramminer_getparams.py b/bbot/modules/paramminer_getparams.py index 27a99f8ab4..27df773bda 100644 --- a/bbot/modules/paramminer_getparams.py +++ b/bbot/modules/paramminer_getparams.py @@ -36,11 +36,8 @@ async def check_batch(self, compare_helper, url, getparam_list): self.helpers.add_get_params(url, test_getparams).geturl(), check_reflection=(len(getparam_list) == 1) ) - def gen_count_args(self, url): - getparam_count = 40 - while 1: - if getparam_count < 0: - break - fake_getparams = {self.rand_string(14): self.rand_string(14) for _ in range(0, getparam_count)} - yield getparam_count, (self.helpers.add_get_params(url, fake_getparams).geturl(),), {} - getparam_count -= 5 + max_count = 40 + + def build_count_test_request(self, url, count): + fake_getparams = {self.rand_string(14): self.rand_string(14) for _ in range(count)} + return (self.helpers.add_get_params(url, fake_getparams).geturl(),), {} diff --git a/bbot/modules/paramminer_headers.py b/bbot/modules/paramminer_headers.py index ae573abadf..fba50251a2 100644 --- a/bbot/modules/paramminer_headers.py +++ b/bbot/modules/paramminer_headers.py @@ -75,7 +75,7 @@ class paramminer_headers(BaseModule): "zx-request-id", "zx-timer", } - _module_threads = 12 + _module_threads = 4 in_scope_only = True compare_mode = "header" default_wordlist = "paramminer_headers.txt" @@ -199,27 +199,35 @@ async def handle_event(self, event): self.debug(f"Encountered HttpCompareError: [{e}] for URL [{event.url}]") await self.process_results(event, results) + max_count = 95 + async def count_test(self, url): baseline = await self.helpers.request(url) if baseline is None: return if str(baseline.status_code)[0] in {"4", "5"}: return - for count, args, kwargs in self.gen_count_args(url): + + # Binary search for the maximum count the server accepts + lo, hi = 0, self.max_count + result = None + while lo <= hi: + mid = (lo + hi) // 2 + if mid == 0: + break + args, kwargs = self.build_count_test_request(url, mid) r = await self.helpers.request(*args, **kwargs) if r is not None and str(r.status_code)[0] not in {"4", "5"}: - return count + result = mid + lo = mid + 1 + else: + hi = mid - 1 + return result - def gen_count_args(self, url): - header_count = 95 - while 1: - if header_count < 0: - break - fake_headers = {} - for i in range(0, header_count): - fake_headers[self.rand_string(14)] = self.rand_string(14) - yield header_count, (url,), {"headers": fake_headers} - header_count -= 5 + def build_count_test_request(self, url, count): + """Build a test request with `count` fake parameters. Returns (args, kwargs) for helpers.request().""" + fake_headers = {self.rand_string(14): self.rand_string(14) for _ in range(count)} + return (url,), {"headers": fake_headers} async def binary_search(self, compare_helper, url, group, reasons=None, reflection=False): if reasons is None: diff --git a/bbot/modules/pgp.py b/bbot/modules/pgp.py index b12372dd1e..655e76e8a9 100644 --- a/bbot/modules/pgp.py +++ b/bbot/modules/pgp.py @@ -37,7 +37,7 @@ async def query(self, query): results = set() urls = self.config.get("search_urls", []) urls = [url.replace("", self.helpers.quote(query)) for url in urls] - async for url, response in self.helpers.request_batch(urls): + for url, response in await self.helpers.request_batch(urls): keyserver = self.helpers.urlparse(url).netloc if response is not None: for email in await self.helpers.re.extract_emails(response.text): diff --git a/bbot/modules/portfilter.py b/bbot/modules/portfilter.py index 21b0313194..0bd41fab3f 100644 --- a/bbot/modules/portfilter.py +++ b/bbot/modules/portfilter.py @@ -19,7 +19,7 @@ class portfilter(BaseInterceptModule): } _priority = 4 - # we consume URLs but we don't want to automatically enable httpx + # we consume URLs but we don't want to automatically enable blasthttp _disable_auto_module_deps = True async def setup(self): diff --git a/bbot/modules/sslcert.py b/bbot/modules/sslcert.py index ff6f6cf402..3f1f3c0e19 100644 --- a/bbot/modules/sslcert.py +++ b/bbot/modules/sslcert.py @@ -1,189 +1,94 @@ -import asyncio -from OpenSSL import crypto -from contextlib import suppress +from urllib.parse import urlparse from bbot.errors import ValidationError from bbot.modules.base import BaseModule -from bbot.core.helpers.async_helpers import NamedLock -from bbot.core.helpers.web.ssl_context import ssl_context_noverify class sslcert(BaseModule): - watched_events = ["OPEN_TCP_PORT"] + watched_events = ["HTTP_RESPONSE"] produced_events = ["DNS_NAME", "EMAIL_ADDRESS"] flags = ["safe", "affiliates", "subdomain-enum", "email-enum", "active", "web"] meta = { - "description": "Visit open ports and retrieve SSL certificates", + "description": "Extract hostnames and emails from TLS certificates in HTTP responses", "created_date": "2022-03-30", "author": "@TheTechromancer", } - options = {"timeout": 5.0, "skip_non_ssl": True} - options_desc = {"timeout": "Socket connect timeout in seconds", "skip_non_ssl": "Don't try common non-SSL ports"} - deps_apt = ["openssl"] - deps_pip = ["pyOpenSSL~=25.3.0"] - _module_threads = 25 scope_distance_modifier = 1 _priority = 2 async def setup(self): - self.timeout = self.config.get("timeout", 5.0) - self.skip_non_ssl = self.config.get("skip_non_ssl", True) - self.non_ssl_ports = (22, 53, 80) - # sometimes we run into a server with A LOT of SANs # these are usually stupid and useless, so we abort based on a different threshold # depending on whether the parent event is in scope self.in_scope_abort_threshold = 50 self.out_of_scope_abort_threshold = 10 - self.hosts_visited = set() - self.ip_lock = NamedLock() - return True - - async def filter_event(self, event): - if self.skip_non_ssl and event.port in self.non_ssl_ports: - return False, f"Port {event.port} doesn't typically use SSL" return True async def handle_event(self, event): - _host = event.host - if event.port: - port = event.port - else: - port = 443 + # Only process HTTPS responses with certificate info + cert_info = event.data.get("cert_info") + if not cert_info: + return + + # Deduplicate by (host, port) to avoid processing the same cert twice + url = event.data.get("url", "") + parsed = urlparse(url) + host = parsed.hostname or "" + port = parsed.port or 443 + host_hash = hash((host, port)) + if host_hash in self.hosts_visited: + return + self.hosts_visited.add(host_hash) - # turn hostnames into IP address(es) - if self.helpers.is_ip(_host): - hosts = [_host] - else: - hosts = list(await self.helpers.resolve(_host)) + # Extract DNS names and emails from cert_info + common_name = (cert_info.get("common_name") or "").lstrip("*.").lower() + sans = [s.lstrip("*.").lower() for s in cert_info.get("sans", [])] + emails = set(cert_info.get("emails", [])) + + # Build DNS names list: CN first, then SANs (excluding CN) + dns_names = [] + if common_name: + dns_names.append(common_name) + for san in sans: + if san and san not in dns_names: + dns_names.append(san) + # Apply threshold if event.scope_distance == 0: abort_threshold = self.in_scope_abort_threshold else: abort_threshold = self.out_of_scope_abort_threshold - coroutines = [self.visit_host(host, port) for host in hosts] - async for coroutine in self.helpers.as_completed(coroutines): - result = await coroutine - if not isinstance(result, tuple) or not len(result) == 3: - continue - dns_names, emails, (host, port) = result - if len(dns_names) > abort_threshold: - netloc = self.helpers.make_netloc(host, port) - self.verbose( - f"Skipping Subject Alternate Names (SANs) on {netloc} because number of hostnames ({len(dns_names):,}) exceeds threshold ({abort_threshold})" - ) - dns_names = dns_names[:1] + [n for n in dns_names[1:] if self.scan.in_scope(n)] - for event_type, results in (("DNS_NAME", set(dns_names)), ("EMAIL_ADDRESS", emails)): - for event_data in results: - if event_data is not None and event_data != event.data: - self.debug(f"Discovered new {event_type} via SSL certificate parsing: [{event_data}]") - try: - ssl_event = self.make_event(event_data, event_type, parent=event, raise_error=True) - parent_event = ssl_event.get_parent() - if parent_event.scope_distance == 0: - tags = ["affiliate"] - else: - tags = None - if ssl_event: - await self.emit_event( - ssl_event, - tags=tags, - context=f"{{module}} parsed SSL certificate at {event.pretty_string} and found {{event.type}}: {{event.pretty_string}}", - ) - except ValidationError as e: - self.hugeinfo(f'Malformed {event_type} "{event_data}" at {event.pretty_string}') - self.debug(f"Invalid data at {host}:{port}: {e}") + if len(dns_names) > abort_threshold: + self.verbose( + f"Skipping SANs on {url} because count ({len(dns_names):,}) exceeds threshold ({abort_threshold})" + ) + dns_names = dns_names[:1] + [n for n in dns_names[1:] if self.scan.in_scope(n)] + + # Emit events + for event_type, results in (("DNS_NAME", set(dns_names)), ("EMAIL_ADDRESS", emails)): + for event_data in results: + if event_data is not None and event_data != str(event.host): + self.debug(f"Discovered new {event_type} via TLS certificate: [{event_data}]") + try: + ssl_event = self.make_event(event_data, event_type, parent=event, raise_error=True) + parent_event = ssl_event.get_parent() + if parent_event.scope_distance == 0: + tags = ["affiliate"] + else: + tags = None + if ssl_event: + await self.emit_event( + ssl_event, + tags=tags, + context=f"{{module}} parsed TLS certificate at {url} and found {{event.type}}: {{event.data}}", + ) + except ValidationError as e: + self.hugeinfo(f'Malformed {event_type} "{event_data}" at {url}') + self.debug(f"Invalid data at {url}: {e}") def on_success_callback(self, event): parent_scope_distance = event.get_parent().scope_distance if parent_scope_distance == 0 and event.scope_distance > 0: event.add_tag("affiliate") - - async def visit_host(self, host, port): - host = self.helpers.make_ip_type(host) - netloc = self.helpers.make_netloc(host, port) - host_hash = hash((host, port)) - dns_names = [] - emails = set() - async with self.ip_lock.lock(host_hash): - if host_hash in self.hosts_visited: - self.debug(f"Already processed {host} on port {port}, skipping") - return [], [], (host, port) - else: - self.hosts_visited.add(host_hash) - - host = str(host) - - # Connect to the host - try: - transport, _ = await asyncio.wait_for( - self.helpers.loop.create_connection( - lambda: asyncio.Protocol(), host, port, ssl=ssl_context_noverify - ), - timeout=self.timeout, - ) - except asyncio.TimeoutError: - self.debug(f"Timed out after {self.timeout} seconds while connecting to {netloc}") - return [], [], (host, port) - except Exception as e: - log_fn = self.warning - if isinstance(e, OSError): - log_fn = self.debug - log_fn(f"Error connecting to {netloc}: {e}") - return [], [], (host, port) - finally: - with suppress(Exception): - transport.close() - - # Get the SSL object - try: - ssl_object = transport.get_extra_info("ssl_object") - except Exception as e: - self.verbose(f"Error getting ssl_object: {e}", trace=True) - return [], [], (host, port) - - # Get the certificate - try: - der = ssl_object.getpeercert(binary_form=True) - except Exception as e: - self.verbose(f"Error getting peer cert: {e}", trace=True) - return [], [], (host, port) - try: - cert = crypto.load_certificate(crypto.FILETYPE_ASN1, der) - except Exception as e: - self.verbose(f"Error loading certificate: {e}", trace=True) - return [], [], (host, port) - issuer = cert.get_issuer() - if issuer.emailAddress and self.helpers.regexes.email_regex.match(issuer.emailAddress): - emails.add(issuer.emailAddress) - subject = cert.get_subject() - if subject.emailAddress and self.helpers.regexes.email_regex.match(subject.emailAddress): - emails.add(subject.emailAddress) - common_name = str(subject.commonName).lstrip("*.").lower() - dns_names = set(self.get_cert_sans(cert)) - with suppress(KeyError): - dns_names.remove(common_name) - dns_names = [common_name] + list(dns_names) - return dns_names, list(emails), (host, port) - - @staticmethod - def get_cert_sans(cert): - sans = [] - raw_sans = None - ext_count = cert.get_extension_count() - for i in range(0, ext_count): - ext = cert.get_extension(i) - short_name = str(ext.get_short_name()) - if "subjectAltName" in short_name: - raw_sans = str(ext) - if raw_sans is not None: - for raw_san in raw_sans.split(","): - hostname = raw_san.split(":", 1)[-1].strip().lower() - # IPv6 addresses - if hostname.startswith("[") and hostname.endswith("]"): - hostname = hostname.strip("[]") - hostname = hostname.lstrip("*.") - sans.append(hostname) - return sans diff --git a/bbot/modules/telerik.py b/bbot/modules/telerik.py index 493117fc7d..39507c6d7e 100644 --- a/bbot/modules/telerik.py +++ b/bbot/modules/telerik.py @@ -156,7 +156,7 @@ class telerik(BaseModule): options = {"exploit_RAU_crypto": False, "include_subdirs": False} options_desc = { "exploit_RAU_crypto": "Attempt to confirm any RAU AXD detections are vulnerable", - "include_subdirs": "Include subdirectories in the scan (off by default)", # will create many finding events if used in conjunction with web spider or ffuf + "include_subdirs": "Include subdirectories in the scan (off by default)", # will create many finding events if used in conjunction with web spider or web_brute } in_scope_only = True @@ -299,9 +299,9 @@ async def handle_event(self, event): url = self.create_url(base_url, f"{dh}?dp=1") urls[url] = dh - gen = self.helpers.request_batch(list(urls)) + results = await self.helpers.request_batch(list(urls)) fail_count = 0 - async for url, response in gen: + for url, response in results: # cancel if we run into timeouts etc. if response is None: fail_count += 1 @@ -310,7 +310,7 @@ async def handle_event(self, event): if fail_count < 2: continue self.debug(f"Cancelling run against {base_url} due to failed request") - await gen.aclose() + break else: if "Cannot deserialize dialog parameters" in response.text: self.debug(f"Detected Telerik UI instance ({dh})") @@ -328,7 +328,7 @@ async def handle_event(self, event): event, ) # Once we have a match we need to stop, because the basic handler (Telerik.Web.UI.DialogHandler.aspx) usually works with a path wildcard - await gen.aclose() + break spellcheckhandler = "Telerik.Web.UI.SpellCheckHandler.axd" result, _ = await self.test_detector(base_url, spellcheckhandler) @@ -417,7 +417,7 @@ def create_url(self, baseurl, detector): async def test_detector(self, baseurl, detector): result = None url = self.create_url(baseurl, detector) - result = await self.helpers.request(url, timeout=self.scan.httpx_timeout) + result = await self.helpers.request(url, timeout=self.scan.http_timeout) return result, detector async def filter_event(self, event): diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 2e3afcb4ac..50083906c9 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -131,9 +131,7 @@ async def brute_buckets(self, buckets, permutations=False, omit_base=False): for bucket_name in new_buckets: url, kwargs = self.build_bucket_request(bucket_name, base_domain, region) bucket_urls_kwargs.append((url, kwargs, (bucket_name, base_domain, region))) - async for url, kwargs, (bucket_name, base_domain, region), response in self.helpers.request_custom_batch( - bucket_urls_kwargs - ): + for url, response, (bucket_name, base_domain, region) in await self.helpers.request_batch(bucket_urls_kwargs): existent_bucket, tags = self._check_bucket_exists(bucket_name, response) if existent_bucket: yield bucket_name, url, tags, num_buckets diff --git a/bbot/modules/wafw00f.py b/bbot/modules/wafw00f.py index 6063a3be33..59083b7d34 100644 --- a/bbot/modules/wafw00f.py +++ b/bbot/modules/wafw00f.py @@ -41,8 +41,8 @@ def _incoming_dedup_hash(self, event): async def handle_event(self, event): url = f"{event.parsed_url.scheme}://{event.parsed_url.netloc}/" - WW = await self.helpers.run_in_executor(wafw00f_main.WAFW00F, url, followredirect=False) - waf_detections, url = await self.helpers.run_in_executor(WW.identwaf) + WW = await self.helpers.run_in_executor_io(wafw00f_main.WAFW00F, url, followredirect=False) + waf_detections, url = await self.helpers.run_in_executor_io(WW.identwaf) if waf_detections: for waf in waf_detections: await self.emit_event( @@ -53,7 +53,7 @@ async def handle_event(self, event): ) else: if self.config.get("generic_detect") is True: - generic = await self.helpers.run_in_executor(WW.genericdetect) + generic = await self.helpers.run_in_executor_io(WW.genericdetect) if generic: waf = "generic detection" await self.emit_event( diff --git a/bbot/modules/web_brute.py b/bbot/modules/web_brute.py new file mode 100644 index 0000000000..d11f0984f7 --- /dev/null +++ b/bbot/modules/web_brute.py @@ -0,0 +1,421 @@ +import random +import string + +import blasthttp + +from bbot.modules.base import BaseModule + + +class web_brute(BaseModule): + watched_events = ["URL"] + produced_events = ["URL_UNVERIFIED"] + flags = ["active", "loud"] + meta = { + "description": "A fast web fuzzer powered by blasthttp", + "created_date": "2022-04-10", + "author": "@liquidsec", + } + + options = { + "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/Web-Content/raft-small-directories.txt", + "lines": 5000, + "max_depth": 0, + "extensions": "", + "ignore_case": False, + "rate": 0, + "concurrency": 50, + } + + options_desc = { + "wordlist": "Specify wordlist to use when finding directories", + "lines": "take only the first N lines from the wordlist when finding directories", + "max_depth": "the maximum directory depth to attempt to solve", + "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", + "ignore_case": "Only put lowercase words into the wordlist", + "rate": "Maximum requests per second (0 = unlimited)", + "concurrency": "Number of concurrent requests per URL being fuzzed", + } + + banned_characters = {" "} + blacklist = ["images", "css", "image"] + + in_scope_only = True + _module_threads = 4 + + async def setup_deps(self): + self.wordlist = await self.helpers.wordlist(self.config.get("wordlist")) + return True + + async def setup(self): + self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10)) + self.blast_client = self.helpers.blasthttp + wordlist_url = self.config.get("wordlist", "") + self.debug(f"Using wordlist [{wordlist_url}]") + self.wordlist_lines = self.generate_wordlist(self.wordlist) + self.words, words_len = self.generate_templist() + self.rate = self.config.get("rate", 0) or None + self.concurrency = self.config.get("concurrency", 50) + # warn if the module rate limit is less restrictive than the global setting + global_rate = self.scan.web_config.get("http_rate_limit", 0) + if self.rate and global_rate and global_rate < self.rate: + self.info( + f"Module rate limit ({self.rate} rps) is higher than global http_rate_limit ({global_rate} rps). " + f"The more restrictive global setting will be used." + ) + self.verbose(f"Generated dynamic wordlist with length [{str(words_len)}]") + try: + self.extensions = self.helpers.chain_lists(self.config.get("extensions", ""), validate=True) + self.debug(f"Using custom extensions: [{','.join(self.extensions)}]") + except ValueError as e: + self.warning(f"Error parsing extensions: {e}") + return False + return True + + async def handle_event(self, event): + if self.helpers.url_depth(event.url) > self.config.get("max_depth"): + self.debug("Exceeded max depth, aborting event") + return + + # only fuzz against a directory + if "." in event.parsed_url.path.split("/")[-1]: + self.debug("Aborting fuzz as period was detected in right-most path segment (likely a file)") + return + else: + # if we think its a directory, normalize it. + fixed_url = event.url.rstrip("/") + "/" + + exts = ["", "/"] + if self.extensions: + for ext in self.extensions: + exts.append(f".{ext}") + + filters = await self.baseline_fuzz(fixed_url, exts=exts) + async for r in self.execute_fuzz(self.words, fixed_url, exts=exts, filters=filters): + await self.emit_event( + r["url"], + "URL_UNVERIFIED", + parent=event, + tags=[f"status-{r['status']}"], + context=f"{{module}} brute-forced {event.url} and found {{event.type}}: {{event.data}}", + ) + + async def filter_event(self, event): + if "endpoint" in event.tags: + self.debug(f"rejecting URL [{event.url}] because we don't fuzz endpoints") + return False + return True + + def _build_batch_headers(self): + """Build header list for batch requests from scan config.""" + headers = [("User-Agent", self.scan.useragent)] + for hk, hv in self.scan.custom_http_headers.items(): + headers.append((hk, hv)) + return headers + + def _response_metrics(self, response): + """Extract metrics from a response for baseline comparison.""" + text = response.text or "" + return { + "status": response.status_code, + "length": len(response.content), + "words": len(text.split()), + "lines": text.count("\n") + 1, + } + + def _batch_response_metrics(self, response): + """Extract metrics from a raw blasthttp batch response.""" + body = response.body or "" + return { + "status": response.status, + "length": len(response.body_bytes), + "words": len(body.split()), + "lines": body.count("\n") + 1, + } + + def _is_baseline_match(self, metrics, baseline_filter): + """Return True if the response matches the baseline (i.e. should be filtered OUT).""" + if baseline_filter.get("abort"): + return True + filter_type = baseline_filter.get("type", "status") + if filter_type == "not_status": + # Filter anything matching this status (e.g. 404) + return metrics["status"] == baseline_filter["status"] + elif filter_type == "status_and_size": + return metrics["status"] == baseline_filter["status"] and metrics["length"] == baseline_filter["size"] + elif filter_type == "status_and_words": + return metrics["status"] == baseline_filter["status"] and metrics["words"] == baseline_filter["words"] + elif filter_type == "status_and_lines": + return metrics["status"] == baseline_filter["status"] and metrics["lines"] == baseline_filter["lines"] + elif filter_type == "status_only": + return metrics["status"] == baseline_filter["status"] + return False + + async def baseline_fuzz(self, url, exts=None, prefix="", suffix=""): + if exts is None: + exts = [""] + filters = {} + headers = self._build_batch_headers() + proxy = self.scan.http_proxy or None + + for ext in exts: + self.debug(f"running baseline for URL [{url}] with ext [{ext}]") + + # Generate 4 canary strings of increasing length and batch them + canary_configs = [] + canary_length = 4 + for _ in range(4): + canary_word = "".join(random.choice(string.ascii_lowercase) for _ in range(canary_length)) + canary_length += 2 + canary_url = f"{url}{prefix}{canary_word}{suffix}{ext}" + canary_configs.append( + blasthttp.BatchConfig( + canary_url, + headers=headers, + timeout=self.scan.http_timeout, + retries=0, + verify_certs=False, + follow_redirects=False, + proxy=proxy, + ) + ) + + canary_results = [] + results = await self.blast_client.request_batch(canary_configs, 4, rate_limit=self.rate) + for result in results: + if result.success: + canary_results.append(self._batch_response_metrics(result.response)) + + # Check we got all 4 responses + if len(canary_results) != 4: + self.warning( + f"Could not attain baseline for URL [{url}] ext [{ext}] — only got {len(canary_results)}/4 responses. Possible connectivity issues." + ) + filters[ext] = {"abort": True, "reason": "CONNECTIVITY_ISSUES"} + continue + + # If status codes differ across canaries, likely load balancing + statuses = {r["status"] for r in canary_results} + if len(statuses) != 1: + self.warning("Got different status codes for each baseline. This could indicate load balancing") + filters[ext] = {"abort": True, "reason": "BASELINE_CHANGED_CODES"} + continue + + baseline_status = canary_results[0]["status"] + + # All 404s — just look for anything not 404 + if baseline_status == 404: + self.debug("All baseline results were 404, filtering on status != 404") + filters[ext] = {"type": "not_status", "status": 404} + continue + + # All 403s — possible WAF + if baseline_status == 403: + self.warning("All baseline requests received 403. A WAF may be actively blocking traffic.") + + # All 429s — rate limiting, abort + if baseline_status == 429: + self.warning( + f"Received 429 (Too Many Requests) for URL [{url}]. A WAF or rate limiter is blocking requests, aborting." + ) + filters[ext] = {"abort": True, "reason": "RECEIVED_429"} + continue + + # Try to find a stable metric for AND filtering + # 1. Same body size across all canaries + if len({r["length"] for r in canary_results}) == 1: + self.debug("All baseline results had the same body size, filtering on status + size") + filters[ext] = { + "type": "status_and_size", + "status": baseline_status, + "size": canary_results[0]["length"], + } + continue + + # 2. Same word count + if len({r["words"] for r in canary_results}) == 1: + self.debug("All baseline results had the same word count, filtering on status + words") + filters[ext] = { + "type": "status_and_words", + "status": baseline_status, + "words": canary_results[0]["words"], + } + continue + + # 3. Same line count + if len({r["lines"] for r in canary_results}) == 1: + self.debug("All baseline results had the same line count, filtering on status + lines") + filters[ext] = { + "type": "status_and_lines", + "status": baseline_status, + "lines": canary_results[0]["lines"], + } + continue + + # Nothing stable — fall back to status-only + self.debug("No stable baseline metric found, filtering on status only") + filters[ext] = {"type": "status_only", "status": baseline_status} + + return filters + + async def execute_fuzz( + self, + words, + url, + prefix="", + suffix="", + exts=None, + filters=None, + baseline=False, + ): + if exts is None: + exts = [""] + if filters is None: + filters = {} + + headers = self._build_batch_headers() + proxy = self.scan.http_proxy or None + + for ext in exts: + # Check for abort filter; default to filtering 404s if no filter provided + ext_filter = filters.get(ext, {"type": "not_status", "status": 404}) + if ext_filter.get("abort"): + self.warning(f"Skipping fuzz for ext [{ext}]: {ext_filter.get('reason', 'ABORT')}") + continue + + # Build batch configs for this extension + configs = [] + for word in words: + fuzz_url = f"{url}{prefix}{word}{suffix}{ext}" + configs.append( + blasthttp.BatchConfig( + fuzz_url, + headers=headers, + timeout=self.scan.http_timeout, + retries=0, + verify_certs=False, + follow_redirects=False, + proxy=proxy, + ) + ) + + if not configs: + continue + + self.debug(f"Fuzzing {len(configs)} URLs for ext [{ext}]") + + # Fire all requests via native blasthttp batch (Rust concurrency) + results = await self.blast_client.request_batch(configs, self.concurrency, rate_limit=self.rate) + + # Index results by URL for ordered processing + results_by_url = {} + for result in results: + results_by_url[result.url] = result + + # Process in wordlist order so canary (appended last) is checked last + canary_found = False + hits = [] + for config in configs: + if self.scan.stopping: + return + result = results_by_url.get(config.url) + if result is None or not result.success: + continue + + response = result.response + metrics = self._batch_response_metrics(response) + + # Check if this matches the baseline (should be filtered out) + if ext_filter and self._is_baseline_match(metrics, ext_filter): + continue + + # Extract the word from the URL to check for canary + word = result.url[len(url) + len(prefix) :] + if suffix: + word = word[: -len(suffix + ext)] if (suffix + ext) else word + elif ext: + word = word[: -len(ext)] + word = word.rstrip("/") + + if word == self.canary: + canary_found = True + continue + + # Filter 3xx redirects to site root — these are soft 404s, + # not real findings (e.g. mod_userdir sending ~user to /) + if 300 <= response.status < 400: + location = "" + for hdr_name, hdr_val in response.headers: + if hdr_name.lower() == "location": + location = hdr_val + break + if location in ("/", url): + self.debug(f"Filtering redirect-to-root hit: {response.url} -> {location}") + continue + + hits.append({"url": response.url, "status": response.status}) + + # If canary was found in results, the server is returning everything — abort + if canary_found and hits: + self.debug("Found canary in results, all hits are likely false positives — aborting") + return + + # Mid-scan validation: one canary check per extension + if hits and not baseline and ext_filter: + canary_word = "".join(random.choice(string.ascii_lowercase) for _ in range(4)) + canary_url = f"{url}{prefix}{canary_word}{suffix}{ext}" + canary_configs = [ + blasthttp.BatchConfig( + canary_url, + headers=headers, + timeout=self.scan.http_timeout, + retries=0, + verify_certs=False, + follow_redirects=False, + proxy=proxy, + ) + ] + canary_batch = await self.blast_client.request_batch(canary_configs, 1, rate_limit=self.rate) + if canary_batch and canary_batch[0].success: + canary_metrics = self._batch_response_metrics(canary_batch[0].response) + if not self._is_baseline_match(canary_metrics, ext_filter): + self.verbose( + f"Would have reported {len(hits)} hit(s), but mid-scan baseline check failed. " + "This could be due to a WAF turning on mid-scan." + ) + self.verbose(f"Aborting the current run against [{url}]") + return + + for hit in hits: + yield hit + + def generate_templist(self, prefix=None): + """Generate word list from wordlist_lines, filtered by optional prefix.""" + words = [] + if prefix: + prefix = prefix.strip().lower() + max_lines = self.config.get("lines") + + for line in self.wordlist_lines[:max_lines]: + if (not prefix) or line.lower().startswith(prefix): + words.append(line) + + words.append(self.canary) + return words, len(words) + + def generate_wordlist(self, wordlist_file): + seen = {} + ignore_case = self.config.get("ignore_case", False) + for line in self.helpers.read_file(wordlist_file): + line = line.strip() + if not line: + continue + if line in self.blacklist: + self.debug(f"Skipping adding [{line}] to wordlist because it was in the blacklist") + continue + if any(x in line for x in self.banned_characters): + self.debug(f"Skipping adding [{line}] to wordlist because it has a banned character") + continue + if ignore_case: + line = line.lower() + seen[line] = None + return list(seen) diff --git a/bbot/modules/ffuf_shortnames.py b/bbot/modules/web_brute_shortnames.py similarity index 82% rename from bbot/modules/ffuf_shortnames.py rename to bbot/modules/web_brute_shortnames.py index f002b4e613..a9a01e641a 100644 --- a/bbot/modules/ffuf_shortnames.py +++ b/bbot/modules/web_brute_shortnames.py @@ -3,15 +3,15 @@ import random import string -from bbot.modules.ffuf import ffuf +from bbot.modules.web_brute import web_brute -class ffuf_shortnames(ffuf): +class web_brute_shortnames(web_brute): watched_events = ["URL_HINT"] produced_events = ["URL_UNVERIFIED"] flags = ["loud", "active", "iis-shortnames", "web-heavy"] meta = { - "description": "Use ffuf in combination IIS shortnames", + "description": "Brute-force IIS shortnames using ML-predicted wordlists", "created_date": "2022-07-05", "author": "@liquidsec", } @@ -19,9 +19,7 @@ class ffuf_shortnames(ffuf): options = { "wordlist_extensions": "", # default is defined within setup function "max_depth": 1, - "version": "2.0.0", "extensions": "", - "ignore_redirects": True, "find_common_prefixes": False, "find_delimiters": True, "find_subwords": False, @@ -32,32 +30,29 @@ class ffuf_shortnames(ffuf): options_desc = { "wordlist_extensions": "Specify wordlist to use when making extension lists", "max_depth": "the maximum directory depth to attempt to solve", - "version": "ffuf version", "extensions": "Optionally include a list of extensions to extend the keyword with (comma separated)", - "ignore_redirects": "Explicitly ignore redirects (301,302)", - "find_common_prefixes": "Attempt to automatically detect common prefixes and make additional ffuf runs against them", - "find_delimiters": "Attempt to detect common delimiters and make additional ffuf runs against them", - "find_subwords": "Attempt to detect subwords and make additional ffuf runs against them", + "find_common_prefixes": "Attempt to automatically detect common prefixes and make additional runs against them", + "find_delimiters": "Attempt to detect common delimiters and make additional runs against them", + "find_subwords": "Attempt to detect subwords and make additional runs against them", "max_predictions": "The maximum number of predictions to generate per shortname prefix", "rate": "Rate of requests per second (default: 0)", } deps_pip = ["numpy"] - deps_common = ["ffuf"] in_scope_only = True supplementary_words = ["html", "ajax", "xml", "json", "api"] - def generate_templist(self, hint, shortname_type): - virtual_file = set() # Use a set to avoid duplicates + async def generate_templist(self, hint, shortname_type): + words = await self.helpers.run_in_executor_cpu(self._generate_templist_sync, hint, shortname_type) + return words, len(words) + def _generate_templist_sync(self, hint, shortname_type): + words = set() for prediction, score in self.predict(hint, self.max_predictions, model=shortname_type): - prediction_lower = prediction.lower() # Convert to lowercase - self.debug(f"Got prediction: [{prediction_lower}] from prefix [{hint}] with score [{score}]") - virtual_file.add(prediction_lower) # Add to set to ensure uniqueness - - virtual_file.add(self.canary.lower()) # Ensure canary is also lowercase - return self.helpers.tempfile(list(virtual_file), pipe=False), len(virtual_file) + words.add(prediction.lower()) + words.add(self.canary.lower()) + return list(words) def predict(self, prefix, n=25, model="endpoint"): predictor_name = f"{model}_predictor" @@ -96,12 +91,12 @@ async def setup_deps(self): return True async def setup(self): - self.proxy = self.scan.web_config.get("http_proxy", "") self.canary = "".join(random.choice(string.ascii_lowercase) for i in range(10)) - self.ignore_redirects = self.config.get("ignore_redirects") + self.blast_client = self.helpers.blasthttp self.max_predictions = self.config.get("max_predictions") self.find_subwords = self.config.get("find_subwords") - self.rate = self.config.get("rate", 0) + self.rate = self.config.get("rate", 0) or None + self.concurrency = 50 class MinimalWordPredictor: def __init__(self): @@ -126,7 +121,7 @@ def find_class(self, module, name): return MinimalWordPredictor return super().find_class(module, name) - self.info("Loading ffuf_shortnames prediction models, could take a while if not cached") + self.info("Loading shortname prediction models, could take a while if not cached") endpoint_model = await self.helpers.wordlist( "https://raw.githubusercontent.com/blacklanternsecurity/wordpredictor/refs/heads/main/trained_models/endpoints.bin" ) @@ -146,7 +141,7 @@ def find_class(self, module, name): self.subword_list = [] if self.find_subwords: - self.debug("Acquiring ffuf_shortnames subword list") + self.debug("Acquiring shortname subword list") subwords = await self.helpers.wordlist( "https://raw.githubusercontent.com/nltk/nltk_data/refs/heads/gh-pages/packages/corpora/words.zip", zip=True, @@ -187,7 +182,7 @@ def find_delimiter(self, hint): async def filter_event(self, event): if "iis-magic-url" in event.tags: - return False, "iis-magic-url URL_HINTs are not solvable by ffuf_shortnames" + return False, "iis-magic-url URL_HINTs are not solvable by web_brute_shortnames" if event.parent.type != "URL": return False, "its parent event is not of type URL" return True @@ -208,13 +203,12 @@ async def handle_event(self, event): elif "shortname-directory" in event.tags: shortname_type = "directory" else: - self.error("ffuf_shortnames received URL_HINT without proper 'shortname-' tag") + self.error("web_brute_shortnames received URL_HINT without proper 'shortname-' tag") return host = f"{event.parent.parsed_url.scheme}://{event.parent.parsed_url.netloc}/" if host not in self.per_host_collection.keys(): self.per_host_collection[host] = [(filename_hint, event.parent.url)] - else: self.per_host_collection[host].append((filename_hint, event.parent.url)) @@ -227,19 +221,16 @@ async def handle_event(self, event): used_extensions = self.build_extension_list(event) if len(filename_hint) == 6: - tempfile, tempfile_len = self.generate_templist(filename_hint, shortname_type) - self.verbose( - f"generated temp word list of size [{str(tempfile_len)}] for filename hint: [{filename_hint}]" - ) - + words, words_len = await self.generate_templist(filename_hint, shortname_type) + self.verbose(f"generated word list of size [{str(words_len)}] for filename hint: [{filename_hint}]") else: - tempfile = self.helpers.tempfile([filename_hint], pipe=False) - tempfile_len = 1 + words = [filename_hint] + words_len = 1 - if tempfile_len > 0: + if words_len > 0: if shortname_type == "endpoint": for ext in used_extensions: - async for r in self.execute_ffuf(tempfile, root_url, suffix=f".{ext}"): + async for r in self.execute_fuzz(words, root_url, suffix=f".{ext}"): await self.emit_event( r["url"], "URL_UNVERIFIED", @@ -249,7 +240,7 @@ async def handle_event(self, event): ) elif shortname_type == "directory": - async for r in self.execute_ffuf(tempfile, root_url, exts=["/"]): + async for r in self.execute_fuzz(words, root_url, exts=["/"]): r_url = f"{r['url'].rstrip('/')}/" await self.emit_event( r_url, @@ -265,15 +256,15 @@ async def handle_event(self, event): if delimiter_r: delimiter, prefix, partial_hint = delimiter_r self.verbose(f"Detected delimiter [{delimiter}] in hint [{filename_hint}]") - tempfile, tempfile_len = self.generate_templist(partial_hint, "directory") - ffuf_prefix = f"{prefix}{delimiter}" - async for r in self.execute_ffuf(tempfile, root_url, prefix=ffuf_prefix, exts=["/"]): + words, words_len = await self.generate_templist(partial_hint, "directory") + fuzz_prefix = f"{prefix}{delimiter}" + async for r in self.execute_fuzz(words, root_url, prefix=fuzz_prefix, exts=["/"]): await self.emit_event( r["url"], "URL_UNVERIFIED", parent=event, tags=[f"status-{r['status']}"], - context=f'{{module}} brute-forced directories with detected prefix "{ffuf_prefix}" and found {{event.type}}: {{event.pretty_string}}', + context=f'{{module}} brute-forced directories with detected prefix "{fuzz_prefix}" and found {{event.type}}: {{event.pretty_string}}', ) elif "shortname-endpoint" in event.tags: @@ -282,23 +273,23 @@ async def handle_event(self, event): if delimiter_r: delimiter, prefix, partial_hint = delimiter_r self.verbose(f"Detected delimiter [{delimiter}] in hint [{filename_hint}]") - tempfile, tempfile_len = self.generate_templist(partial_hint, "endpoint") - ffuf_prefix = f"{prefix}{delimiter}" - async for r in self.execute_ffuf(tempfile, root_url, prefix=ffuf_prefix, suffix=f".{ext}"): + words, words_len = await self.generate_templist(partial_hint, "endpoint") + fuzz_prefix = f"{prefix}{delimiter}" + async for r in self.execute_fuzz(words, root_url, prefix=fuzz_prefix, suffix=f".{ext}"): await self.emit_event( r["url"], "URL_UNVERIFIED", parent=event, tags=[f"status-{r['status']}"], - context=f'{{module}} brute-forced {ext.upper()} files with detected prefix "{ffuf_prefix}" and found {{event.type}}: {{event.pretty_string}}', + context=f'{{module}} brute-forced {ext.upper()} files with detected prefix "{fuzz_prefix}" and found {{event.type}}: {{event.pretty_string}}', ) if self.config.get("find_subwords"): subword, suffix = self.find_subword(filename_hint) if subword: if "shortname-directory" in event.tags: - tempfile, tempfile_len = self.generate_templist(suffix, "directory") - async for r in self.execute_ffuf(tempfile, root_url, prefix=subword, exts=["/"]): + words, words_len = await self.generate_templist(suffix, "directory") + async for r in self.execute_fuzz(words, root_url, prefix=subword, exts=["/"]): await self.emit_event( r["url"], "URL_UNVERIFIED", @@ -308,8 +299,8 @@ async def handle_event(self, event): ) elif "shortname-endpoint" in event.tags: for ext in used_extensions: - tempfile, tempfile_len = self.generate_templist(suffix, "endpoint") - async for r in self.execute_ffuf(tempfile, root_url, prefix=subword, suffix=f".{ext}"): + words, words_len = await self.generate_templist(suffix, "endpoint") + async for r in self.execute_fuzz(words, root_url, prefix=subword, suffix=f".{ext}"): await self.emit_event( r["url"], "URL_UNVERIFIED", @@ -337,21 +328,21 @@ async def finish(self): elif "shortname-directory" in self.shortname_to_event[hint].tags: shortname_type = "directory" else: - self.error("ffuf_shortnames received URL_HINT without proper 'shortname-' tag") + self.error("web_brute_shortnames received URL_HINT without proper 'shortname-' tag") continue partial_hint = hint[len(prefix) :] # safeguard to prevent loading the entire wordlist if len(partial_hint) > 0: - tempfile, tempfile_len = self.generate_templist(partial_hint, shortname_type) + words, words_len = await self.generate_templist(partial_hint, shortname_type) if "shortname-directory" in self.shortname_to_event[hint].tags: self.verbose( f"Running common prefix check for URL_HINT: {hint} with prefix: {prefix} and partial_hint: {partial_hint}" ) - async for r in self.execute_ffuf(tempfile, url, prefix=prefix, exts=["/"]): + async for r in self.execute_fuzz(words, url, prefix=prefix, exts=["/"]): await self.emit_event( r["url"], "URL_UNVERIFIED", @@ -366,9 +357,7 @@ async def finish(self): self.verbose( f"Running common prefix check for URL_HINT: {hint} with prefix: {prefix}, extension: .{ext}, and partial_hint: {partial_hint}" ) - async for r in self.execute_ffuf( - tempfile, url, prefix=prefix, suffix=f".{ext}" - ): + async for r in self.execute_fuzz(words, url, prefix=prefix, suffix=f".{ext}"): await self.emit_event( r["url"], "URL_UNVERIFIED", diff --git a/bbot/modules/wpscan.py b/bbot/modules/wpscan.py index b7a703b90d..cb989a21a3 100644 --- a/bbot/modules/wpscan.py +++ b/bbot/modules/wpscan.py @@ -170,7 +170,7 @@ def parse_wp_misc(self, interesting_json, base_url, source_event): source_event, ) else: - url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["httpx-safe"]) + url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["blasthttp-safe"]) if url_event: yield url_event yield self.make_event( @@ -242,7 +242,7 @@ def parse_wp_plugins(self, plugins_json, base_url, source_event): for name, plugin in plugins_json.items(): url = plugin.get("location", base_url) if url != base_url: - url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["httpx-safe"]) + url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["blasthttp-safe"]) if url_event: yield url_event version = plugin.get("version", {}).get("number", "") diff --git a/bbot/presets/nuclei/nuclei-budget.yml b/bbot/presets/nuclei/nuclei-budget.yml index d4ac5c8163..a290a11215 100644 --- a/bbot/presets/nuclei/nuclei-budget.yml +++ b/bbot/presets/nuclei/nuclei-budget.yml @@ -1,7 +1,7 @@ description: Run nuclei scans against all discovered targets, using budget mode to look for low hanging fruit with greatly reduced number of requests modules: - - httpx + - http - nuclei - portfilter diff --git a/bbot/presets/nuclei/nuclei-heavy.yml b/bbot/presets/nuclei/nuclei-heavy.yml index 6468ba9c81..d185ecef55 100644 --- a/bbot/presets/nuclei/nuclei-heavy.yml +++ b/bbot/presets/nuclei/nuclei-heavy.yml @@ -1,7 +1,7 @@ description: Run nuclei scans against all discovered targets, allowing for spidering, against ALL URLs, and with additional discovery modules. modules: - - httpx + - http - nuclei - robots - urlscan diff --git a/bbot/presets/nuclei/nuclei-technology.yml b/bbot/presets/nuclei/nuclei-technology.yml index c2c4c8cf7a..fc63767f6d 100644 --- a/bbot/presets/nuclei/nuclei-technology.yml +++ b/bbot/presets/nuclei/nuclei-technology.yml @@ -1,7 +1,7 @@ description: Run nuclei scans against all discovered targets, running templates which match discovered technologies modules: - - httpx + - http - nuclei - portfilter diff --git a/bbot/presets/nuclei/nuclei.yml b/bbot/presets/nuclei/nuclei.yml index d0d77978eb..fc1e34a4b3 100644 --- a/bbot/presets/nuclei/nuclei.yml +++ b/bbot/presets/nuclei/nuclei.yml @@ -1,7 +1,7 @@ description: Run nuclei scans against all discovered targets modules: - - httpx + - http - nuclei - portfilter diff --git a/bbot/presets/spider.yml b/bbot/presets/spider.yml index 9e98ff4539..b08998a396 100644 --- a/bbot/presets/spider.yml +++ b/bbot/presets/spider.yml @@ -1,7 +1,7 @@ description: Recursive web spider modules: - - httpx + - http blacklist: # Prevent spider from invalidating sessions by logging out diff --git a/bbot/presets/web/dirbust-heavy.yml b/bbot/presets/web/dirbust-heavy.yml index effba2554a..ce18b4ccd2 100644 --- a/bbot/presets/web/dirbust-heavy.yml +++ b/bbot/presets/web/dirbust-heavy.yml @@ -7,16 +7,16 @@ flags: - iis-shortnames modules: - - ffuf + - web_brute - wayback config: modules: iis_shortnames: - # we exploit the shortnames vulnerability to produce URL_HINTs which are consumed by ffuf_shortnames + # we exploit the shortnames vulnerability to produce URL_HINTs which are consumed by web_brute_shortnames detect_only: False - ffuf: - depth: 3 + web_brute: + max_depth: 3 lines: 5000 extensions: - php diff --git a/bbot/presets/web/dirbust-light.yml b/bbot/presets/web/dirbust-light.yml index d088ee24ee..52b9ff40b4 100644 --- a/bbot/presets/web/dirbust-light.yml +++ b/bbot/presets/web/dirbust-light.yml @@ -4,10 +4,10 @@ include: - iis-shortnames modules: - - ffuf + - web_brute config: modules: - ffuf: + web_brute: # wordlist size = 1000 lines: 1000 diff --git a/bbot/presets/web/dotnet-audit.yml b/bbot/presets/web/dotnet-audit.yml index 581c26c7ab..642f89e8f7 100644 --- a/bbot/presets/web/dotnet-audit.yml +++ b/bbot/presets/web/dotnet-audit.yml @@ -5,10 +5,10 @@ include: - iis-shortnames modules: - - httpx + - http - badsecrets - - ffuf_shortnames - - ffuf + - web_brute_shortnames + - web_brute - telerik - ajaxpro - dotnetnuke @@ -16,10 +16,10 @@ modules: config: modules: - ffuf: + web_brute: extensions: asp,aspx,ashx,asmx,ascx - extensions_ignore_case: True - ffuf_shortnames: + ignore_case: True + web_brute_shortnames: find_subwords: True telerik: exploit_RAU_crypto: True diff --git a/bbot/presets/web/lightfuzz-light.yml b/bbot/presets/web/lightfuzz-light.yml index 052a774cd9..1c209e2942 100644 --- a/bbot/presets/web/lightfuzz-light.yml +++ b/bbot/presets/web/lightfuzz-light.yml @@ -1,7 +1,7 @@ description: "Minimal fuzzing: only path traversal, SQLi, and XSS submodules. No POST requests. No companion modules. Safest option for running alongside larger scans with minimal overhead." modules: - - httpx + - http - lightfuzz - portfilter diff --git a/bbot/presets/web/lightfuzz-xss.yml b/bbot/presets/web/lightfuzz-xss.yml index 554f1f2b60..c3bf35fcdb 100644 --- a/bbot/presets/web/lightfuzz-xss.yml +++ b/bbot/presets/web/lightfuzz-xss.yml @@ -1,7 +1,7 @@ description: "XSS-only: enables only the xss submodule with paramminer_getparams and reflected_parameters. POST disabled, no query string collapsing. Example of a focused single-submodule preset." modules: - - httpx + - http - lightfuzz - paramminer_getparams - reflected_parameters diff --git a/bbot/presets/web/paramminer.yml b/bbot/presets/web/paramminer.yml index 634e883742..5b6bd4dec3 100644 --- a/bbot/presets/web/paramminer.yml +++ b/bbot/presets/web/paramminer.yml @@ -4,7 +4,7 @@ flags: - web-paramminer modules: - - httpx + - http - reflected_parameters - hunt diff --git a/bbot/scanner/preset/environ.py b/bbot/scanner/preset/environ.py index a222dd1bb3..c021b1e248 100644 --- a/bbot/scanner/preset/environ.py +++ b/bbot/scanner/preset/environ.py @@ -77,7 +77,7 @@ def __init__(self, preset): def flatten_config(self, config, base="bbot"): """ Flatten a JSON-like config into a list of environment variables: - {"modules": [{"httpx": {"timeout": 5}}]} --> "BBOT_MODULES_HTTPX_TIMEOUT=5" + {"modules": [{"http": {"threads": 10}}]} --> "BBOT_MODULES_BLASTHTTP_THREADS=10" """ if type(config) == omegaconf.dictconfig.DictConfig: for k, v in config.items(): diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 01e6ba681f..ad37eb047d 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -588,11 +588,34 @@ def apply_log_level(self, apply_core=False): @property def helpers(self): if self._helpers is None: + # Ensure we have at least a minimal target object before any helper (especially web helpers) is constructed. + + self._ensure_minimal_target() from bbot.core.helpers.helper import ConfigAwareHelper self._helpers = ConfigAwareHelper(preset=self) return self._helpers + def _ensure_minimal_target(self): + """ + Lazily construct a minimal BBOTTarget from the current seeds / whitelist / blacklist if one does not already exist. + + This is intentionally lighter-weight than the full async target + preparation performed in `bake()` (which also calls + `target.generate_children()`). + """ + if self._target is not None: + return + + from bbot.scanner.target import BBOTTarget + + self._target = BBOTTarget( + *list(self._seeds), + whitelist=self._whitelist, # modify this after scope rework branch is merged into dev + blacklist=self._blacklist, + strict_scope=self.strict_scope, + ) + @property def module_loader(self): self.environ diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index b2883cded6..49c4bb9b56 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -41,11 +41,11 @@ class Scanner: Examples: Create scan with multiple targets: - >>> my_scan = Scanner("evilcorp.com", "1.2.3.0/24", modules=["portscan", "sslcert", "httpx"]) + >>> my_scan = Scanner("evilcorp.com", "1.2.3.0/24", modules=["portscan", "sslcert", "http"]) Create scan with custom config: >>> config = {"http_proxy": "http://127.0.0.1:8080", "modules": {"portscan": {"top_ports": 2000}}} - >>> my_scan = Scanner("www.evilcorp.com", modules=["portscan", "httpx"], config=config) + >>> my_scan = Scanner("www.evilcorp.com", modules=["portscan", "http"], config=config) Start the scan, iterating over events as they're discovered (synchronous): >>> for event in my_scan.start(): @@ -233,21 +233,21 @@ def __init__( self.web_max_redirects = max(max_redirects, self.web_spider_distance) self.http_proxy = web_config.get("http_proxy", "") self.http_timeout = web_config.get("http_timeout", 10) - self.httpx_timeout = web_config.get("httpx_timeout", 5) + self.blasthttp_timeout = web_config.get("blasthttp_timeout", 5) self.http_retries = web_config.get("http_retries", 1) - self.httpx_retries = web_config.get("httpx_retries", 1) + self.blasthttp_retries = web_config.get("blasthttp_retries", 1) self.useragent = f"{web_config.get('user_agent', 'BBOT')} {web_config.get('user_agent_suffix') or ''}".strip() # custom HTTP headers warning self.custom_http_headers = web_config.get("http_headers", {}) if self.custom_http_headers: self.warning( - "You have enabled custom HTTP headers. These will be attached to all in-scope requests and all requests made by httpx." + "You have enabled custom HTTP headers. These will be attached to all in-scope requests and all requests made by blasthttp." ) # custom HTTP cookies warning self.custom_http_cookies = web_config.get("http_cookies", {}) if self.custom_http_cookies: self.warning( - "You have enabled custom HTTP cookies. These will be attached to all in-scope requests and all requests made by httpx." + "You have enabled custom HTTP cookies. These will be attached to all in-scope requests and all requests made by blasthttp." ) # url file extensions @@ -605,6 +605,15 @@ async def load_modules(self): After all modules are loaded, they are sorted by `_priority` and stored in the `modules` dictionary. """ if not self._modules_loaded: + # If the preset hasn't been baked yet but modules have been + # manually attached (e.g. in tests), skip the automatic loading + # pipeline and operate only on the existing modules. + if self.preset is None: + if not self.modules: + self.warning("No modules to load") + self._modules_loaded = True + return + if not self.preset.modules: self.warning("No modules to load") self._modules_loaded = True @@ -862,13 +871,13 @@ def _drain_queues(self): """ self.debug("Draining queues") for module in self.modules.values(): - with contextlib.suppress(asyncio.queues.QueueEmpty): - while 1: - if module.incoming_event_queue not in (None, False): + if module.incoming_event_queue not in (None, False): + with contextlib.suppress(asyncio.queues.QueueEmpty): + while 1: module.incoming_event_queue.get_nowait() - with contextlib.suppress(asyncio.queues.QueueEmpty): - while 1: - if module.outgoing_event_queue not in (None, False): + if module.outgoing_event_queue not in (None, False): + with contextlib.suppress(asyncio.queues.QueueEmpty): + while 1: module.outgoing_event_queue.get_nowait() self.debug("Finished draining queues") @@ -944,9 +953,6 @@ async def _cleanup(self): # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() - # clean up web engine - if self.helpers._web is not None: - await self.helpers.web.shutdown() # In some test paths, `_prep()` is never called, so `home` and # `temp_dir` may not exist. Treat those as best-effort cleanups. home = getattr(self, "home", None) @@ -1188,7 +1194,7 @@ async def dns_yara_rules(self): if self.dns_yara_rules_uncompiled is not None: import yara - self._dns_yara_rules = await self.helpers.run_in_executor( + self._dns_yara_rules = await self.helpers.run_in_executor_cpu( yara.compile, source="\n".join(self.dns_yara_rules_uncompiled.values()) ) return self._dns_yara_rules @@ -1204,7 +1210,7 @@ async def extract_in_scope_hostnames(self, s): matches = set() dns_yara_rules = await self.dns_yara_rules() if dns_yara_rules is not None: - for match in await self.helpers.run_in_executor(dns_yara_rules.match, data=s): + for match in await self.helpers.run_in_executor_cpu(dns_yara_rules.match, data=s): for string in match.strings: for instance in string.instances: matches.add(str(instance)) diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index ff5c0d4548..23a5e02876 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -168,19 +168,16 @@ def __bool__(self): return bool(len(self._rt)) or bool(self.event_seeds) def __getstate__(self): + """Serialize for pickling — RadixTarget (PyO3) can't be pickled directly.""" return { - "event_seeds": self.event_seeds, + "inputs": [str(e.input) for e in self.event_seeds], "strict_scope": self.strict_scope, - "acl_mode": self._rt._acl_mode, + "acl_mode": getattr(self._rt, "_acl_mode", False), } def __setstate__(self, state): - self.strict_scope = state["strict_scope"] - self._rt = RadixTarget(strict_scope=state["strict_scope"], acl_mode=state["acl_mode"]) - self.event_seeds = set() - for event_seed in state["event_seeds"]: - self.event_seeds.add(event_seed) - self._add(event_seed.host, data=event_seed) + """Reconstruct from pickled state.""" + self.__init__(*state["inputs"], strict_scope=state["strict_scope"], acl_mode=state.get("acl_mode", False)) def __eq__(self, other): return self.hash == getattr(other, "hash", None) @@ -272,8 +269,11 @@ def get(self, host, **kwargs): # first, check event's host against blacklist try: event_seed = self._make_event_seed(host, raise_error=raise_error) - host = event_seed.host - to_match = event_seed.data + if event_seed is None: + to_match = str(host) + else: + host = event_seed.host + to_match = event_seed.data except ValidationError: to_match = str(host) event_result = super().get(host) diff --git a/bbot/scripts/benchmark_report.py b/bbot/scripts/benchmark_report.py index 50ca6f3384..2c64e9c399 100644 --- a/bbot/scripts/benchmark_report.py +++ b/bbot/scripts/benchmark_report.py @@ -33,7 +33,10 @@ def get_current_branch() -> str: def checkout_branch(branch: str, repo_path: Path = None): - """Checkout a git branch, cleaning up generated files first.""" + """Checkout a git branch, cleaning up generated and modified files first.""" + # Reset modified tracked files (e.g. uv.lock changed by `uv sync`) + print("Resetting modified tracked files before checkout") + run_command(["git", "checkout", "--", "."], cwd=repo_path) # Remove untracked files before checkout. Without this, files generated # by one branch's toolchain (e.g. uv.lock from `uv run` on a Poetry # branch) block checkout to a branch that tracks those same files. diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 1deca2be08..5884ef5eb4 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -98,7 +98,7 @@ def helpers(scan): return scan.helpers -httpx_response = { +blasthttp_response = { "timestamp": "2022-11-14T12:14:27.377566416-05:00", "hash": { "body_md5": "84238dfc8092e5d9c0dac8ef93371a07", @@ -223,7 +223,9 @@ class bbot_events: parent=scan.root_event, module=dummy_module, ) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) + http_response = scan.make_event( + blasthttp_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module + ) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index ac4ef7b8b2..dec15b4496 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -42,11 +42,6 @@ CORE.merge_default(test_config) -@pytest.fixture -def assert_all_responses_were_requested() -> bool: - return False - - @pytest.fixture(autouse=True) def silence_live_logging(): for handler in logging.getLogger().handlers: @@ -93,24 +88,104 @@ def bbot_httpserver_ssl(): server.clear() -def should_mock(request): - return ( - request.url.host - not in ["127.0.0.1", "localhost", "raw.githubusercontent.com", "asndb.api.bbot.io"] + interactsh_servers - ) +def _should_mock(host): + """Check if a request to this host should be mocked (True = mock, False = pass through).""" + return host not in ["127.0.0.1", "localhost", "raw.githubusercontent.com"] + interactsh_servers + + +@pytest.fixture +def blasthttp_mock(): + """ + Mock fixture for blasthttp engine requests. + Patches HTTPEngine.request() to intercept external requests and return + mock responses. Requests to localhost/127.0.0.1 pass through to real blasthttp. + """ + from bbot.core.helpers.web.web import WebHelper + from bbot.test.mock_blasthttp import BlasthttpMock + + mock = BlasthttpMock(should_mock_fn=_should_mock) + original_request = WebHelper.request + + async def patched_request(self, *args, **kwargs): + # Peek at URL without modifying kwargs + url = kwargs.get("url", "") + if not url and args: + url = str(args[0]) + # If resolve_ip points to localhost, pass through to real blasthttp + resolve_ip = kwargs.get("resolve_ip", "") + if resolve_ip and resolve_ip in ("127.0.0.1", "::1"): + return await original_request(self, *args, **kwargs) + if url and mock.should_intercept(url): + # Read raise_error before the mock pops it + raise_error = kwargs.get("raise_error", False) + result = await mock.handle_engine_request(self, *args, **kwargs) + # Convert engine-style error dicts to WebError exceptions + if isinstance(result, dict) and "_request_error" in result: + if raise_error: + from bbot.errors import WebError + + error = WebError(result["_request_error"]) + error.response = result.get("_response") + raise error + return None + return result + return await original_request(self, *args, **kwargs) + + original_request_batch = WebHelper.request_batch + + async def patched_request_batch(self, urls, threads=10, **kwargs): + import blasthttp + + # Run the real entry-parsing and config-building logic unmodified + entries = [] + has_tracker = False + for entry in urls: + if isinstance(entry, str): + entries.append((entry, kwargs, None)) + elif isinstance(entry, tuple): + url = entry[0] + req_kwargs = entry[1] if len(entry) > 1 and isinstance(entry[1], dict) else kwargs + tracker = entry[2] if len(entry) > 2 else None + if tracker is not None: + has_tracker = True + entries.append((url, req_kwargs, tracker)) + else: + entries.append((str(entry), kwargs, None)) + + if not entries: + return [] + + configs = [] + trackers = [] + for url, req_kwargs, tracker in entries: + url, method, blast_kwargs = self._build_blasthttp_kwargs(url, **req_kwargs) + config = blasthttp.BatchConfig(url, **blast_kwargs) + configs.append(config) + trackers.append(tracker) -def pytest_collection_modifyitems(config, items): - # make sure all tests have the httpx_mock marker - for item in items: - item.add_marker( - pytest.mark.httpx_mock( - should_mock=should_mock, - assert_all_requests_were_expected=False, - assert_all_responses_were_requested=False, - can_send_already_matched_responses=True, - ) - ) + # Route through mock's batch handler instead of Rust client directly + batch_results = await mock.handle_batch(self.client, configs, concurrency=threads) + + from bbot.core.helpers.web.blast_response import BlasthttpResponse + + results = [] + for i, br in enumerate(batch_results): + if br.response is not None: + response = BlasthttpResponse(br.response, request_url=br.url, method="GET") + else: + response = None + if has_tracker: + results.append((br.url, response, trackers[i])) + else: + results.append((br.url, response)) + return results + + WebHelper.request = patched_request + WebHelper.request_batch = patched_request_batch + yield mock + WebHelper.request = original_request + WebHelper.request_batch = original_request_batch @pytest.fixture diff --git a/bbot/test/fastapi_test.py b/bbot/test/fastapi_test.py index a4a1d57107..be327ed541 100644 --- a/bbot/test/fastapi_test.py +++ b/bbot/test/fastapi_test.py @@ -7,7 +7,7 @@ @app.get("/start") async def start(targets: List[str] = Query(...)): - scanner = Scanner(*targets, modules=["httpx"]) + scanner = Scanner(*targets, modules=["http"]) events = [e async for e in scanner.async_start()] return [e.json() for e in events] diff --git a/bbot/test/mock_blasthttp.py b/bbot/test/mock_blasthttp.py new file mode 100644 index 0000000000..a4c2819e15 --- /dev/null +++ b/bbot/test/mock_blasthttp.py @@ -0,0 +1,426 @@ +""" +Mock fixture for blasthttp web requests. + +Intercepts WebHelper.request() at the Python level. Requests to external +hosts are handled by registered mock responses/callbacks. Requests to +localhost/127.0.0.1 pass through to the real blasthttp client. + +Drop-in types (MockRequest, MockResponse, TimeoutException) replace their +equivalents in test callback signatures. +""" + +import re +import json as _json +import asyncio +from urllib.parse import urlparse, urlencode + +from bbot.core.helpers.web.blast_response import BlasthttpResponse + + +# ── Mock types for test callbacks ──── + + +class TimeoutException(RuntimeError): + """Mock timeout exception for tests.""" + + pass + + +class MockRequest: + """Mock request object for test callbacks.""" + + def __init__(self, url, method="GET", headers=None, content=b""): + self.url = url + self.method = method + self.headers = headers or {} + self.content = content if isinstance(content, bytes) else content.encode() + + +class MockResponse: + """Mock response object for test callbacks.""" + + def __init__(self, status_code=200, json=None, text=None, headers=None): + self.status_code = status_code + if text is not None: + self.text = text + elif json is not None: + self.text = _json.dumps(json) + else: + self.text = "" + self.headers = headers or {} + + +# ── Internal mock helpers ────────────────────────────────────────── + + +class _MockRawResponse: + """Mimics blasthttp PyO3 Response for BlasthttpResponse.__init__.""" + + def __init__(self, status=200, url="", body="", headers=None, elapsed_ms=0): + self.status = status + self.url = url + self.body = body + self.body_bytes = body.encode("utf-8", errors="surrogateescape") if isinstance(body, str) else body + self.headers = headers or [] + self.elapsed_ms = elapsed_ms + + +class _MockBatchResult: + """Mimics blasthttp BatchResult for request_batch mocking.""" + + def __init__(self, url, response=None, error=None): + self.url = url + self.response = response + self.error = error + + +# ── Main mock class ─────────────────────────────────────────────── + + +class BlasthttpMock: + """ + Mock fixture that intercepts HTTPEngine.request() calls. + + Supports the add_response() / add_callback() API for mocking HTTP requests in + tests. + """ + + def __init__(self, should_mock_fn=None): + self._handlers = [] # FIFO queue of handlers + self._recycled = [] # consumed handlers available for reuse + self._should_mock = should_mock_fn or (lambda host: True) + + def add_response( + self, + url=None, + method=None, + text=None, + json=None, + content=None, + status_code=200, + headers=None, + match_headers=None, + match_json=None, + ): + """Register a static mock response.""" + # Normalize response headers to list of tuples + if headers is None: + header_list = [] + elif isinstance(headers, dict): + header_list = [] + for k, v in headers.items(): + if isinstance(v, list): + # Expand list values into multiple tuples (e.g. multiple Set-Cookie) + for item in v: + header_list.append((k, item)) + else: + header_list.append((k, v)) + elif isinstance(headers, list): + header_list = list(headers) + else: + header_list = [] + + # Build body — content (bytes) takes lowest priority after text/json + if text is not None: + body = text + # Auto-add Content-Type for text + if not any(k.lower() == "content-type" for k, _ in header_list): + header_list.append(("Content-Type", "text/plain; charset=utf-8")) + elif json is not None: + body = _json.dumps(json) + if not any(k.lower() == "content-type" for k, _ in header_list): + header_list.append(("Content-Type", "application/json")) + elif content is not None: + # Raw bytes content — decode to str for body field, keep as-is for body_bytes + if isinstance(content, bytes): + body = content.decode("utf-8", errors="surrogateescape") + else: + body = str(content) + if not any(k.lower() == "content-type" for k, _ in header_list): + header_list.append(("Content-Type", "application/octet-stream")) + else: + body = "" + + self._handlers.append( + { + "type": "response", + "url": url, + "method": method, + "match_headers": match_headers, + "match_json": match_json, + "status_code": status_code, + "body": body, + "headers": header_list, + } + ) + + def add_callback(self, callback, url=None): + """Register a callback that receives MockRequest and returns MockResponse.""" + self._handlers.append( + { + "type": "callback", + "url": url, + "callback": callback, + } + ) + + def should_intercept(self, url): + """Check if this URL should be intercepted by the mock.""" + host = urlparse(url).hostname or "" + return self._should_mock(host) + + def _url_matches(self, pattern, url): + """Match URL against string or compiled regex.""" + if pattern is None: + return True + if isinstance(pattern, re.Pattern): + return pattern.search(url) is not None + return url == pattern + + def _matches(self, handler, url, method, headers, body_str): + """Check if a handler matches the request criteria.""" + if not self._url_matches(handler.get("url"), url): + return False + + handler_method = handler.get("method") + if handler_method and method.upper() != handler_method.upper(): + return False + + match_headers = handler.get("match_headers") + if match_headers: + for k, v in match_headers.items(): + if headers.get(k) != v: + return False + + match_json = handler.get("match_json") + if match_json is not None: + try: + req_json = _json.loads(body_str) if body_str else {} + for k, v in match_json.items(): + if req_json.get(k) != v: + return False + except (ValueError, TypeError): + return False + + return True + + def _make_blast_response(self, url, method, status_code, body, headers): + """Create a BlasthttpResponse from mock data.""" + raw = _MockRawResponse( + status=status_code, + url=url, + body=body, + headers=headers, + ) + return BlasthttpResponse(raw, request_url=url, method=method) + + async def _find_and_execute(self, url, method, headers, body_str): + """ + Find a matching handler and execute it. + + Handlers are consumed in FIFO order. + Consumed handlers are recycled so they can be reused for subsequent + requests (matching can_send_already_matched_responses=True). + """ + # Try primary handlers first, then recycled ones + for handler_list in (self._handlers, self._recycled): + for i, handler in enumerate(handler_list): + if handler["type"] == "response": + if self._matches(handler, url, method, headers, body_str): + # Consume from primary queue, recycle + if handler_list is self._handlers: + self._handlers.pop(i) + self._recycled.append(handler) + return self._make_blast_response( + url, method, handler["status_code"], handler["body"], handler["headers"] + ) + + elif handler["type"] == "callback": + if handler.get("url") is not None and not self._url_matches(handler["url"], url): + continue + + # Consume from primary queue, recycle + if handler_list is self._handlers: + self._handlers.pop(i) + self._recycled.append(handler) + + callback = handler["callback"] + content = body_str.encode() if isinstance(body_str, str) else (body_str or b"") + mock_request = MockRequest(url=url, method=method, headers=headers, content=content) + + # Call callback — may be sync or async, may raise + if asyncio.iscoroutinefunction(callback): + result = await callback(mock_request) + else: + result = callback(mock_request) + + # Convert MockResponse to BlasthttpResponse + if isinstance(result, MockResponse): + if isinstance(result.headers, dict): + resp_headers = [] + for k, v in result.headers.items(): + if isinstance(v, list): + for item in v: + resp_headers.append((k, item)) + else: + resp_headers.append((k, v)) + else: + resp_headers = result.headers or [] + return self._make_blast_response(url, method, result.status_code, result.text, resp_headers) + + return result + + # No handler matched — raise error (simulates unreachable host) + raise RuntimeError(f"No mock response registered for {method} {url}") + + async def handle_engine_request(self, web_helper_self, *args, **kwargs): + """ + Process kwargs like WebHelper.request() and return a mock response. + + Called by the patched request method when should_intercept() is True. + """ + raise_error = kwargs.pop("raise_error", False) + kwargs.pop("cache_for", None) + kwargs.pop("client", None) + kwargs.pop("stream", None) + kwargs.pop("files", None) + + allow_redirects = kwargs.pop("allow_redirects", None) + if allow_redirects is not None and "follow_redirects" not in kwargs: + kwargs["follow_redirects"] = allow_redirects + + if len(args) == 1: + kwargs["url"] = args[0] + args = () + + url = kwargs.pop("url", "") + method = kwargs.pop("method", "GET") + headers = kwargs.pop("headers", None) or {} + body = kwargs.pop("body", None) + data = kwargs.pop("data", None) + json_body = kwargs.pop("json", None) + # Pop remaining kwargs so they don't cause issues + cookies = kwargs.pop("cookies", None) + auth = kwargs.pop("auth", None) + kwargs.pop("timeout", None) + follow_redirects = kwargs.pop("follow_redirects", None) + kwargs.pop("max_redirects", None) + kwargs.pop("proxy", None) + kwargs.pop("retries", None) + kwargs.pop("params", None) + kwargs.pop("max_body_size", None) + + # Synthesize Authorization header from auth tuple (mirrors engine.py) + if auth: + import base64 + + user, passwd = auth + cred = base64.b64encode(f"{user}:{passwd}".encode()).decode() + headers["Authorization"] = f"Basic {cred}" + + # Synthesize Cookie header from cookies dict (mirrors engine.py) + if cookies: + cookie_str = "; ".join(f"{ck}={cv}" for ck, cv in cookies.items()) + headers["Cookie"] = cookie_str + + # Determine body string for matching + body_str = "" + if json_body is not None: + body_str = _json.dumps(json_body) + elif data is not None: + if isinstance(data, dict): + body_str = urlencode(data) + else: + body_str = str(data) + elif body is not None: + body_str = str(body) + + try: + response = await self._find_and_execute(url, method, headers, body_str) + + # Follow redirects if requested (mirrors blasthttp behavior) + max_hops = 10 + while follow_redirects and response is not None and max_hops > 0: + if not hasattr(response, "status_code"): + break + if response.status_code not in (301, 302, 303, 307, 308): + break + location = response.headers.get("location", "") + if not location: + break + # Resolve relative redirect URLs + if location.startswith("/"): + parsed_url = urlparse(url) + location = f"{parsed_url.scheme}://{parsed_url.netloc}{location}" + url = location + max_hops -= 1 + response = await self._find_and_execute(url, method, headers, body_str) + + return response + except Exception as e: + import logging + + logging.getLogger("bbot.test.mock").debug(f"Mock exception for {method} {url}: {e}") + error_msg = str(e) + if raise_error: + return {"_request_error": error_msg, "_response": None} + return None + + async def handle_batch(self, real_client, configs, concurrency, rate_limit=None): + """ + Process a list of BatchConfig objects through the mock. + + For each config, if the URL should be intercepted, route it through the + mock handlers. Otherwise pass it through to the real Rust client. + The return value mimics blasthttp's request_batch: a list of BatchResult-like + objects with .url, .response, and .error attributes. + """ + mock_configs = [] + passthrough_configs = [] + passthrough_indices = [] + + for i, config in enumerate(configs): + url = config.url if hasattr(config, "url") else str(config) + if self.should_intercept(url): + mock_configs.append((i, config)) + else: + passthrough_configs.append(config) + passthrough_indices.append(i) + + # Get real results for passthrough (localhost) URLs + passthrough_results = {} + if passthrough_configs: + real_results = await real_client.request_batch(passthrough_configs, concurrency=concurrency) + for idx, result in zip(passthrough_indices, real_results): + passthrough_results[idx] = result + + # Build results in original order + results = [None] * len(configs) + + for idx, config in mock_configs: + url = config.url if hasattr(config, "url") else str(config) + method = getattr(config, "method", "GET") or "GET" + headers_raw = getattr(config, "headers", None) or [] + headers = {k: v for k, v in headers_raw} if headers_raw else {} + body_str = getattr(config, "body", "") or "" + + try: + response = await self._find_and_execute(url, method, headers, body_str) + if response is not None: + # response is a BlasthttpResponse — extract the raw response for BatchResult + raw = _MockRawResponse( + status=response.status_code, + url=url, + body=response.text, + headers=[(k, v) for k, v in response.headers.items()], + ) + results[idx] = _MockBatchResult(url=url, response=raw) + else: + results[idx] = _MockBatchResult(url=url, error="mock returned None") + except Exception as e: + results[idx] = _MockBatchResult(url=url, error=str(e)) + + for idx, result in passthrough_results.items(): + results[idx] = result + + return results diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index 3dca8aeded..2c122f3191 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -1,9 +1,12 @@ import time -import httpx +import json import multiprocessing from pathlib import Path from subprocess import Popen from contextlib import suppress +from urllib.request import urlopen, Request +from urllib.error import URLError +from urllib.parse import urlencode cwd = Path(__file__).parent.parent.parent @@ -11,7 +14,7 @@ def run_bbot_multiprocess(queue): from bbot.scanner import Scanner - scan = Scanner("http://127.0.0.1:8888", "blacklanternsecurity.com", modules=["httpx"]) + scan = Scanner("http://127.0.0.1:8888", "blacklanternsecurity.com", modules=["http"]) events = [e.json() for e in scan.start()] queue.put(events) @@ -39,22 +42,20 @@ def test_bbot_fastapi(bbot_httpserver): start_time = time.time() while True: try: - response = httpx.get("http://127.0.0.1:8978/ping") - response.raise_for_status() + response = urlopen("http://127.0.0.1:8978/ping") + response.read() break - except httpx.HTTPError: + except (URLError, ConnectionError): if time.time() - start_time > 60: raise TimeoutError("Server did not start within 60 seconds.") time.sleep(0.1) continue # run a scan - response = httpx.get( - "http://127.0.0.1:8978/start", - params={"targets": ["http://127.0.0.1:8888", "blacklanternsecurity.com"]}, - timeout=100, - ) - events = response.json() + params = urlencode({"targets": ["http://127.0.0.1:8888", "blacklanternsecurity.com"]}, doseq=True) + req = Request(f"http://127.0.0.1:8978/start?{params}") + response = urlopen(req, timeout=100) + events = json.loads(response.read()) assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 9824bd3fee..38a84fe90d 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -286,7 +286,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): out, err = capsys.readouterr() assert result is None assert "| dnsbrute " in out - assert "| httpx " in out + assert "| http " in out assert "| robots " in out # list modules by flag @@ -295,7 +295,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): out, err = capsys.readouterr() assert result is None assert "| dnsbrute " in out - assert "| httpx " in out + assert "| http " in out assert "| robots " not in out # list modules by flag + required flag @@ -304,7 +304,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): out, err = capsys.readouterr() assert result is None assert "| chaos " in out - assert "| httpx " not in out + assert "| http " not in out # list modules by flag + excluded flag monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-ef", "active", "-l"]) @@ -312,7 +312,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): out, err = capsys.readouterr() assert result is None assert "| chaos " in out - assert "| httpx " not in out + assert "| http " not in out # list modules by flag + excluded module monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "-l"]) @@ -320,7 +320,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): out, err = capsys.readouterr() assert result is None assert "| dnsbrute " not in out - assert "| httpx " in out + assert "| http " in out # output modules override caplog.clear() diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 5f1e458ce0..cac3d49fe5 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -621,7 +621,7 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert "host" not in hostless_event_json - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(blasthttp_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" assert ( diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 68bb524341..44a83d8394 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -963,7 +963,7 @@ async def test_parameter_validation(helpers): async def test_rm_temp_dir_at_exit(helpers): from bbot.scanner import Scanner - scan = Scanner("127.0.0.1", modules=["httpx"]) + scan = Scanner("127.0.0.1", modules=["http"]) await scan._prep() temp_dir = scan.home / "temp" @@ -1134,3 +1134,15 @@ def test_simhash_similarity(helpers): # Most importantly, verify the ordering is correct assert identical_similarity > slight_similarity > moderate_similarity > very_similarity > complete_similarity + + +def test_clean_dns_record(): + from bbot.core.helpers.misc import clean_dns_record + + assert clean_dns_record("www.example.com.") == "www.example.com" + assert clean_dns_record("www.example.com") == "www.example.com" + # dnspython to_text() can produce quoted strings for certain record types + assert clean_dns_record('"d1jwhzvlef5tfb.example.com"') == "d1jwhzvlef5tfb.example.com" + assert clean_dns_record("'d1jwhzvlef5tfb.example.com'") == "d1jwhzvlef5tfb.example.com" + # quotes + trailing dot + assert clean_dns_record('"d1jwhzvlef5tfb.example.com."') == "d1jwhzvlef5tfb.example.com" diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 161fad5958..eeb3b714b4 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -40,9 +40,10 @@ def bbot_other_httpservers(): server.clear() - @pytest.mark.asyncio -async def test_manager_scope_accuracy_correct(bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl): +async def test_manager_scope_accuracy_correct( + bbot_scanner, bbot_httpserver, bbot_other_httpservers, bbot_httpserver_ssl +): """ This test ensures that BBOT correctly handles different scope distance settings. It performs these tests for normal modules, output modules, and their graph variants, @@ -59,7 +60,9 @@ async def test_manager_scope_accuracy_correct(bbot_scanner, bbot_httpserver, bbo server_77.expect_request(uri="/").respond_with_data(response_data="") server_88.expect_request(uri="/").respond_with_data(response_data="") server_99.expect_request(uri="/").respond_with_data(response_data="") - server_111.expect_request(uri="/").respond_with_data(response_data="") + server_111.expect_request(uri="/").respond_with_data( + response_data="" + ) server_222.expect_request(uri="/").respond_with_data(response_data="") server_33.expect_request(uri="/").respond_with_data(response_data="") @@ -112,12 +115,12 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) await dummy_module_nodupes.setup() await dummy_graph_output_module.setup() await dummy_graph_batch_output_module.setup() - + scan.modules["dummy_module"] = dummy_module scan.modules["dummy_module_nodupes"] = dummy_module_nodupes scan.modules["dummy_graph_output_module"] = dummy_graph_output_module scan.modules["dummy_graph_batch_output_module"] = dummy_graph_batch_output_module - + await scan.helpers.dns._mock_dns(_dns_mock) if scan_callback is not None: scan_callback(scan) @@ -149,7 +152,13 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) ) assert len(events) == 3 - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notrealzies"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.test.notreal"]) @@ -157,14 +166,32 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) for _all_events in (all_events, all_events_nodups): assert len(_all_events) == 3 - assert 1 == len([e for e in _all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in _all_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in _all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies"]) assert 0 == len([e for e in _all_events if e.type == "DNS_NAME" and e.data == "www.test.notreal"]) assert 0 == len([e for e in _all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77"]) assert len(graph_output_events) == 3 - assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in graph_output_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66"]) assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notrealzies"]) assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "www.test.notreal"]) @@ -178,38 +205,152 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) ) assert len(events) == 4 - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notrealzies"]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert len(all_events) == 9 - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) assert 0 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert len(all_events_nodups) == 7 - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) assert 0 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 6 - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "test.notrealzies" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "www.test.notreal" + and e.internal is False + and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77"]) assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) @@ -222,39 +363,177 @@ async def do_scan(*args, _config={}, _dns_mock={}, scan_callback=None, **kwargs) ) assert len(events) == 7 - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert len(all_events) == 8 - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) assert 0 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert len(all_events_nodups) == 7 - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test2.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) assert 0 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 7 - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "test.notrealzies" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "www.test.notreal" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test2.notrealzies"]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) @@ -277,7 +556,15 @@ async def filter_event(self, event): async def handle_event(self, event): await self.emit_event( - {"host": str(event.host), "description": "yep", "severity": "CRITICAL", "confidence": "CONFIRMED", "name": "Test Finding"}, "FINDING", parent=event + { + "host": str(event.host), + "description": "yep", + "severity": "CRITICAL", + "confidence": "CONFIRMED", + "name": "Test Finding", + }, + "FINDING", + parent=event, ) def custom_setup(scan): @@ -293,38 +580,152 @@ def custom_setup(scan): ) assert len(events) == 5 - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notrealzies"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77"]) - assert 1 == len([e for e in events if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in events + if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3 + ] + ) assert len(all_events) == 8 - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 2]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3]) - assert 1 == len([e for e in all_events if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3 + ] + ) assert len(all_events_nodups) == 6 - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3]) - assert 1 == len([e for e in all_events_nodups if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 7 - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notrealzies" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3]) - assert 1 == len([e for e in _graph_output_events if e.type == "FINDING" and e.data["host"] == "127.0.0.77" and e.internal is False and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.66" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "test.notrealzies" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is True and e.scope_distance == 3 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "FINDING" + and e.data["host"] == "127.0.0.77" + and e.internal is False + and e.scope_distance == 3 + ] + ) - # httpx/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 0 + # http/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 0 events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( "127.0.0.1/31", - modules=["httpx"], + modules=["http"], _config={ "dns": {"minimal": False, "search_distance": 2}, "scope": {"report_distance": 1, "search_distance": 0}, @@ -337,68 +738,313 @@ def custom_setup(scan): ) assert len(events) == 7 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) assert len(all_events) == 14 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 and "spider-danger" in e.tags]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) assert len(all_events_nodups) == 12 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 and "spider-danger" in e.tags]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 7 - assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) - # httpx/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 0, in_scope_only = False + # http/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 0, in_scope_only = False events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( "127.0.0.1/31", - modules=["httpx"], + modules=["http"], _config={ "dns": {"minimal": False, "search_distance": 2}, "scope": {"search_distance": 0, "report_distance": 1}, "excavate": True, "speculate": True, - "modules": {"httpx": {"in_scope_only": False}, "speculate": {"ports": "8888"}}, + "modules": {"http": {"in_scope_only": False}, "speculate": {"ports": "8888"}}, "omit_event_types": ["HTTP_RESPONSE", "URL_UNVERIFIED"], }, ) @@ -406,173 +1052,900 @@ def custom_setup(scan): assert len(events) == 8 # 2024-08-01 # Removed OPEN_TCP_PORT("127.0.0.77:8888") - # before, this event was speculated off the URL_UNVERIFIED, and that's what was used by httpx to generate the URL. it was graph-important. - # now for whatever reason, httpx is visiting the url directly and the open port isn't being used + # before, this event was speculated off the URL_UNVERIFIED, and that's what was used by http module to generate the URL. it was graph-important. + # now for whatever reason, http module is visiting the url directly and the open port isn't being used # I don't know what changed exactly, but it doesn't matter, either way is equally valid and bbot is meant to be flexible this way. - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) assert len(all_events) == 18 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) assert len(all_events_nodups) == 16 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 and "spider-danger" in e.tags]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 8 - assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and "spider-danger" in e.tags]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"] + ) + assert 0 == len( + [ + e + for e in _graph_output_events + if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 0 == len( + [ + e + for e in _graph_output_events + if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/"] + ) - # httpx/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 1 + # http/speculate IP_RANGE --> IP_ADDRESS --> OPEN_TCP_PORT --> URL, search distance = 1 events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( "127.0.0.1/31", - modules=["httpx"], + modules=["http"], _config={ "dns": {"minimal": False, "search_distance": 2}, "scope": {"report_distance": 1, "search_distance": 1}, "excavate": True, "speculate": True, - "modules": {"httpx": {"in_scope_only": False}, "speculate": {"ports": "8888"}}, + "modules": {"http": {"in_scope_only": False}, "speculate": {"ports": "8888"}}, "omit_event_types": ["HTTP_RESPONSE", "URL_UNVERIFIED"], }, ) assert len(events) == 8 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) assert len(all_events) == 22 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 and "spider-danger" in e.tags]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.88:8888" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.99:8888/" and e.internal is True and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.88:8888" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.99:8888/" + and e.internal is True + and e.scope_distance == 3 + ] + ) assert len(all_events_nodups) == 20 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 and "spider-danger" in e.tags]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.88:8888" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.99:8888/" and e.internal is True and e.scope_distance == 3]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + and "spider-danger" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.77:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.88:8888" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.88:8888/" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.88:8888/" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.99:8888/" + and e.internal is True + and e.scope_distance == 3 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 8 - assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.1:8888/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.1:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.77:8888/"] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.77:8888/" and e.internal is False and e.scope_distance == 1]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.77:8888/" + and e.internal is False + and e.scope_distance == 1 + ] + ) + assert 0 == len( + [ + e + for e in _graph_output_events + if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.88:8888/"] + ) # 2 events from a single HTTP_RESPONSE events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( - "127.0.0.111/31", "127.0.0.222", "127.0.0.33", + "127.0.0.111/31", + "127.0.0.222", + "127.0.0.33", seeds=["127.0.0.111/31"], - modules=["httpx"], + modules=["http"], output_modules=["python"], _config={ "dns": {"minimal": False, "search_distance": 2}, @@ -585,25 +1958,82 @@ def custom_setup(scan): ) assert len(events) == 12 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.110:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.111:8888"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.111:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.222:8889/"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.33:8889/"]) - assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888"]) assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888"]) assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889"]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889"]) - assert 1 == len([e for e in events if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.33:8889"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.44:8888/"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.44"]) @@ -613,88 +2043,599 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888"]) assert len(all_events) == 31 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.110:8888" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.44:8888/" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.55:8888/" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.55" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.44:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888" and e.internal is True and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.110" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.110:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.33:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.222:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.222:8889" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.222:8889" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.33:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.33:8889" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.33:8889" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.33:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.44:8888/" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.55:8888/" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.55" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.44:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.55:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) assert len(all_events_nodups) == 27 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.110" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.110:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.44:8888/" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.55:8888/" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.55" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.44:8888" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888" and e.internal is True and e.scope_distance == 1]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.110" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.110:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["input"] == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.222" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.33:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.222:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.222:8889" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.33:8888" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.33:8889" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "HTTP_RESPONSE" + and e.data["url"] == "http://127.0.0.33:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.44:8888/" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "URL_UNVERIFIED" + and e.url == "http://127.0.0.55:8888/" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.55" and e.internal is True and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.44:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.55:8888" + and e.internal is True + and e.scope_distance == 1 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 12 - assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" + and e.data == "127.0.0.110/31" + and e.internal is False + and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.110:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.111:8888" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.111:8888/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.111:8888"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.111:8888/"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.222:8889/"]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.110:8888"] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.111:8888" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.111:8888/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.111:8888"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.111:8888/"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.222:8889/"] + ) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.222"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.33:8889/"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.33:8889/"] + ) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33"]) - assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888"] + ) + assert 1 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889"] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888"]) assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889"]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.222:8889/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889"]) - assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.url == "http://127.0.0.33:8889/" and e.internal is False and e.scope_distance == 0]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.33:8889"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.44:8888/"]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.222:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889"] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "URL" + and e.url == "http://127.0.0.33:8889/" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.33:8889"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.44:8888/"] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.44"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.55:8888/"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.55:8888/"] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.55"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.44:8888"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888"]) @@ -707,123 +2648,476 @@ def custom_setup(scan): _dns_mock={"www.bbottest.notreal": {"A": ["127.0.1.0"]}, "test.notreal": {"A": ["127.0.0.1"]}}, ) - assert len(events) == 7 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + # sslcert now watches HTTP_RESPONSE, which auto-enables the http module. + # This adds HTTP_RESPONSE, URL (from http), and URL_UNVERIFIED (from speculate) events. + assert len(events) == 10 + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999"]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is False and e.scope_distance == 1 and str(e.module) == "sslcert" and "affiliate" in e.tags]) + assert 1 == len([e for e in events if e.type == "HTTP_RESPONSE" and str(e.module) == "http"]) + assert 1 == len([e for e in events if e.type == "URL" and str(e.module) == "http"]) + assert 1 == len([e for e in events if e.type == "URL_UNVERIFIED" and str(e.module) == "speculate"]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "test.notreal" + and e.internal is False + and e.scope_distance == 0 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "www.bbottest.notreal" + and e.internal is False + and e.scope_distance == 1 + and str(e.module) == "sslcert" + and "affiliate" in e.tags + ] + ) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) assert 0 == len([e for e in events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) - assert len(all_events) == 13 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal is True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is False and e.scope_distance == 1 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal is True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal is True and e.scope_distance == 2 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal is True and e.scope_distance == 0 and str(e.module) == "speculate"]) - - assert len(all_events_nodups) == 11 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal is True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is False and e.scope_distance == 1 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal is True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal is True and e.scope_distance == 2 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal is True and e.scope_distance == 0 and str(e.module) == "speculate"]) + assert len(all_events) == 16 + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:9999" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 2 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:9999" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and str(e.module) == "http"]) + assert 1 == len([e for e in all_events if e.type == "URL" and str(e.module) == "http"]) + assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and str(e.module) == "speculate"]) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" + and e.data == "test.notreal" + and e.internal is False + and e.scope_distance == 0 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME" + and e.data == "www.bbottest.notreal" + and e.internal is False + and e.scope_distance == 1 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "www.bbottest.notreal:9999" + and e.internal is True + and e.scope_distance == 1 + and str(e.module) == "speculate" + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "DNS_NAME_UNRESOLVED" + and e.data == "bbottest.notreal" + and e.internal is True + and e.scope_distance == 2 + and str(e.module) == "speculate" + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "test.notreal:9999" + and e.internal is True + and e.scope_distance == 0 + and str(e.module) == "speculate" + ] + ) + + assert len(all_events_nodups) == 14 + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:9999" + and e.internal is True + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:9999" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and str(e.module) == "http"]) + assert 1 == len([e for e in all_events_nodups if e.type == "URL" and str(e.module) == "http"]) + assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and str(e.module) == "speculate"]) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" + and e.data == "test.notreal" + and e.internal is False + and e.scope_distance == 0 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME" + and e.data == "www.bbottest.notreal" + and e.internal is False + and e.scope_distance == 1 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "www.bbottest.notreal:9999" + and e.internal is True + and e.scope_distance == 1 + and str(e.module) == "speculate" + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "DNS_NAME_UNRESOLVED" + and e.data == "bbottest.notreal" + and e.internal is True + and e.scope_distance == 2 + and str(e.module) == "speculate" + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "test.notreal:9999" + and e.internal is True + and e.scope_distance == 0 + and str(e.module) == "speculate" + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): - assert len(_graph_output_events) == 7 - assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0]) + assert len(_graph_output_events) == 10 + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is False and e.scope_distance == 0 + ] + ) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is False and e.scope_distance == 0]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is False and e.scope_distance == 1 and str(e.module) == "sslcert"]) - assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"]) - assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"]) - assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:9999" + and e.internal is False + and e.scope_distance == 0 + ] + ) + assert 1 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and str(e.module) == "http"]) + assert 1 == len([e for e in _graph_output_events if e.type == "URL" and str(e.module) == "http"]) + assert 1 == len( + [e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and str(e.module) == "speculate"] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "test.notreal" + and e.internal is False + and e.scope_distance == 0 + and str(e.module) == "sslcert" + ] + ) + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "DNS_NAME" + and e.data == "www.bbottest.notreal" + and e.internal is False + and e.scope_distance == 1 + and str(e.module) == "sslcert" + ] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"] + ) + assert 0 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"] + ) # sslcert with out-of-scope chain events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( "127.0.1.0", seeds=["127.0.0.0/31"], modules=["sslcert"], - _config={"scope": {"search_distance": 1, "report_distance": 0}, "speculate": True, "modules": {"speculate": {"ports": "9999"}}}, + _config={ + "scope": {"search_distance": 1, "report_distance": 0}, + "speculate": True, + "modules": {"speculate": {"ports": "9999"}}, + }, _dns_mock={"www.bbottest.notreal": {"A": ["127.0.0.1"]}, "test.notreal": {"A": ["127.0.1.0"]}}, ) - assert len(events) == 4 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1]) + # sslcert now depends on http (via HTTP_RESPONSE), and http doesn't probe out-of-scope + # servers, so the entire sslcert chain doesn't fire in this scenario. + assert len(events) == 3 + assert 1 == len( + [ + e + for e in events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1 + ] + ) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999"]) - assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) + assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert len(all_events) == 11 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 2]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal is True and e.scope_distance == 2]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is True and e.scope_distance == 3 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal is True and e.scope_distance == 0 and str(e.module) == "speculate"]) - - assert len(all_events_nodups) == 9 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal is True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal is True and e.scope_distance == 3 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal is True and e.scope_distance == 0 and str(e.module) == "speculate"]) + assert len(all_events) == 6 + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:9999" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:9999" + and e.internal is True + and e.scope_distance == 2 + ] + ) + + assert len(all_events_nodups) == 6 + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is True and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.0:9999" + and e.internal is True + and e.scope_distance == 2 + ] + ) + assert 1 == len( + [ + e + for e in all_events_nodups + if e.type == "OPEN_TCP_PORT" + and e.data == "127.0.0.1:9999" + and e.internal is True + and e.scope_distance == 2 + ] + ) for _graph_output_events in (graph_output_events, graph_output_batch_events): - assert len(_graph_output_events) == 6 - assert 1 == len([e for e in graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1]) - assert 0 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal is True and e.scope_distance == 2]) - assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) - assert 1 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal is True and e.scope_distance == 1]) - assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal is False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) - assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) + assert len(_graph_output_events) == 3 + assert 1 == len( + [ + e + for e in _graph_output_events + if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal is False and e.scope_distance == 1 + ] + ) + assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) + assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1"]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999"]) + assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal"]) + assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) + assert 0 == len( + [e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"] + ) @pytest.mark.asyncio async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): - bbot_httpserver.expect_request(uri="/").respond_with_data(response_data="") + bbot_httpserver.expect_request(uri="/").respond_with_data( + response_data="" + ) # dns search distance = 1, report distance = 0 scan = bbot_scanner( - "127.0.0.0/29", "test.notreal", + "127.0.0.0/29", + "test.notreal", seeds=["http://127.0.0.1:8888"], - modules=["httpx"], + modules=["http"], config={"excavate": True, "dns": {"minimal": False, "search_distance": 1}, "scope": {"report_distance": 0}}, blacklist=["127.0.0.64/29"], ) await scan._prep() - await scan.helpers.dns._mock_dns({ - "www-prod.test.notreal": {"A": ["127.0.0.66"]}, - "www-dev.test.notreal": {"A": ["127.0.0.22"]}, - }) + await scan.helpers.dns._mock_dns( + { + "www-prod.test.notreal": {"A": ["127.0.0.66"]}, + "www-dev.test.notreal": {"A": ["127.0.0.22"]}, + } + ) events = [e async for e in scan.async_start()] @@ -831,7 +3125,10 @@ async def test_manager_blacklist(bbot_scanner, bbot_httpserver, caplog): # the hostname is in-scope, but its IP is blacklisted, therefore we shouldn't see it assert not any(e for e in events if e.type == "URL_UNVERIFIED" and e.url == "http://www-prod.test.notreal:8888/") - assert 'Not forwarding DNS_NAME("www-prod.test.notreal", module=excavate' in caplog.text and 'because it has a blacklisted DNS record' in caplog.text + assert ( + 'Not forwarding DNS_NAME("www-prod.test.notreal", module=excavate' in caplog.text + and "because it has a blacklisted DNS record" in caplog.text + ) @pytest.mark.asyncio @@ -870,9 +3167,13 @@ async def test_scope_accuracy_with_special_urls(bbot_scanner, bbot_httpserver): was causing special URLs to be rejected by critical internal modules like `_scan_egress`, leading to the output of unwanted URLs. """ bbot_httpserver.expect_request(uri="/v2/users/spacex").respond_with_data(response_data="") - bbot_httpserver.expect_request(uri="/u/spacex").respond_with_data(response_data="") + bbot_httpserver.expect_request(uri="/u/spacex").respond_with_data( + response_data="" + ) - scan = bbot_scanner("ORG:spacex", modules=["httpx", "social", "dockerhub"], config={"speculate": True, "excavate": True}) + scan = bbot_scanner( + "ORG:spacex", modules=["http", "social", "dockerhub"], config={"speculate": True, "excavate": True} + ) await scan._prep() scan.modules["dockerhub"].site_url = "http://127.0.0.1:8888" @@ -887,7 +3188,7 @@ class DummyModule(BaseModule): accept_dupes = True accept_url_special = True events = [] - + async def handle_event(self, event): self.events.append(event) @@ -895,10 +3196,12 @@ async def handle_event(self, event): scan.modules["dummy_module"] = dummy_module events = [e async for e in scan.async_start()] - + # there are actually 2 URL events. They are both from the same URL, but one was extracted by the full URL regex, and the other by the src/href= regex. # however, they should be deduped by scan_ingress. - bad_url_events = [e for e in dummy_module.events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/asdf.js"] + bad_url_events = [ + e for e in dummy_module.events if e.type == "URL_UNVERIFIED" and e.url == "http://127.0.0.1:8888/asdf.js" + ] assert len(bad_url_events) == 1 # they should both be internal assert all(e.internal is True for e in bad_url_events) diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 8054e3c95d..ab0259a306 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -9,7 +9,7 @@ @pytest.mark.asyncio -async def test_modules_basic_checks(events, httpx_mock): +async def test_modules_basic_checks(events, blasthttp_mock): from bbot.scanner import Scanner scan = Scanner(config={"omit_event_types": ["URL_UNVERIFIED"]}) @@ -162,8 +162,7 @@ async def test_modules_basic_checks(events, httpx_mock): assert "subdomain-enum" in all_preloaded["dnsbrute"]["flags"] assert "wordlist" in all_preloaded["dnsbrute"]["config"] assert type(all_preloaded["dnsbrute"]["config"]["max_depth"]) == int - assert all_preloaded["sslcert"]["deps"]["pip"] - assert all_preloaded["sslcert"]["deps"]["apt"] + assert all_preloaded["baddns"]["deps"]["pip"] assert all_preloaded["dnsbrute"]["deps"]["common"] assert all_preloaded["gowitness"]["deps"]["ansible"] @@ -406,7 +405,7 @@ async def setup(self): @pytest.mark.asyncio -async def test_modules_basic_stats(helpers, events, bbot_scanner, httpx_mock, monkeypatch): +async def test_modules_basic_stats(helpers, events, bbot_scanner, blasthttp_mock, monkeypatch): from bbot.modules.base import BaseModule class dummy(BaseModule): diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index ff8e4d0214..4dfb0a7fc3 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -515,7 +515,7 @@ async def test_preset_module_resolution(clean_default_config): assert "active" in dotnetnuke_flags assert "subdomain-enum" in sslcert_flags assert "subdomain-enum" in wayback_flags - assert "httpx" in dotnetnuke_preloaded["deps"]["modules"] + assert "http" in dotnetnuke_preloaded["deps"]["modules"] # make sure we have the expected defaults assert not preset.scan_modules @@ -532,7 +532,7 @@ async def test_preset_module_resolution(clean_default_config): # make sure dependency resolution works as expected preset = Preset(modules=["dotnetnuke"]).bake() - assert set(preset.scan_modules) == {"dotnetnuke", "httpx"} + assert set(preset.scan_modules) == {"dotnetnuke", "http"} # make sure flags work as expected preset = Preset(flags=["subdomain-enum"]).bake() @@ -565,7 +565,7 @@ async def test_preset_module_resolution(clean_default_config): # normal module enableement preset = Preset(modules=["sslcert", "dotnetnuke", "wayback"]).bake() - assert set(preset.scan_modules) == {"sslcert", "dotnetnuke", "wayback", "httpx"} + assert set(preset.scan_modules) == {"sslcert", "dotnetnuke", "wayback", "http"} # modules + flag exclusions preset = Preset(exclude_flags=["active"], modules=["sslcert", "dotnetnuke", "wayback"]).bake() @@ -588,7 +588,7 @@ async def test_preset_module_resolution(clean_default_config): "excavate", "unarchive", "txt", - "httpx", + "http", "csv", "dotnetnuke", } @@ -1069,7 +1069,7 @@ async def test_preset_override(clean_default_config): assert preset.config["web"]["spider_distance"] == 1 assert preset.config["web"]["spider_depth"] == 2 assert preset.config["modules"]["asdf"]["option1"] == "fdsa" - assert set(preset.scan_modules) == {"httpx", "c99", "robots", "virustotal", "securitytrails"} + assert set(preset.scan_modules) == {"http", "c99", "robots", "virustotal", "securitytrails"} async def test_preset_require_exclude(clean_default_config): @@ -1101,7 +1101,7 @@ def get_module_flags(p): assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) assert "chaos" in [x[0] for x in module_flags] - assert "httpx" not in [x[0] for x in module_flags] + assert "http" not in [x[0] for x in module_flags] assert all("passive" in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) @@ -1112,7 +1112,7 @@ def get_module_flags(p): assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) assert "chaos" in [x[0] for x in module_flags] - assert "httpx" not in [x[0] for x in module_flags] + assert "http" not in [x[0] for x in module_flags] assert all("passive" in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) @@ -1123,7 +1123,7 @@ def get_module_flags(p): assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) assert "dnsbrute" not in [x[0] for x in module_flags] - assert "httpx" in [x[0] for x in module_flags] + assert "http" in [x[0] for x in module_flags] assert any("passive" in flags for module, flags in module_flags) assert any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 0e0bb4c690..ec2520783b 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -45,7 +45,8 @@ async def test_python_api(clean_default_config): # make sure config loads properly bbot_home = "/tmp/.bbot_python_api_test" - Scanner("127.0.0.1", config={"home": bbot_home}) + scan4 = Scanner("127.0.0.1", config={"home": bbot_home}) + await scan4._prep() assert os.environ["BBOT_TOOLS"] == str(Path(bbot_home) / "tools") # output modules override @@ -84,7 +85,8 @@ async def test_python_api_sync(clean_default_config): assert list(scan2.helpers.read_file(out_file)) # make sure config loads properly bbot_home = "/tmp/.bbot_python_api_test" - Scanner("127.0.0.1", config={"home": bbot_home}) + scan3 = Scanner("127.0.0.1", config={"home": bbot_home}) + await scan3._prep() assert os.environ["BBOT_TOOLS"] == str(Path(bbot_home) / "tools") @@ -109,16 +111,18 @@ def test_python_api_sync_no_pending_tasks(): assert len(pending) == 0, f"Found {len(pending)} pending tasks after scan: {pending}" -def test_python_api_validation(): +async def test_python_api_validation(): from bbot.scanner import Scanner, Preset # invalid target with pytest.raises(ValidationError) as error: - Scanner("asdf:::asdf") + scan = Scanner("asdf:::asdf") + await scan._prep() assert str(error.value) == 'Unable to autodetect data type from "asdf:::asdf"' # invalid module with pytest.raises(ValidationError) as error: - Scanner(modules=["asdf"]) + scan = Scanner(modules=["asdf"]) + await scan._prep() assert str(error.value) == 'Could not find scan module "asdf". Did you mean "asn"?' # invalid output module with pytest.raises(ValidationError) as error: @@ -126,7 +130,8 @@ def test_python_api_validation(): assert str(error.value) == 'Could not find output module "asdf". Did you mean "nats"?' # invalid excluded module with pytest.raises(ValidationError) as error: - Scanner(exclude_modules=["asdf"]) + scan = Scanner(exclude_modules=["asdf"]) + await scan._prep() assert str(error.value) == 'Could not find module "asdf". Did you mean "asn"?' # invalid flag with pytest.raises(ValidationError) as error: @@ -142,7 +147,8 @@ def test_python_api_validation(): assert str(error.value) == 'Could not find flag "activ". Did you mean "active"?' # output module as normal module with pytest.raises(ValidationError) as error: - Scanner(modules=["json"]) + scan = Scanner(modules=["json"]) + await scan._prep() assert str(error.value) == 'Could not find scan module "json". Did you mean "asn"?' # normal module as output module with pytest.raises(ValidationError) as error: @@ -150,7 +156,8 @@ def test_python_api_validation(): assert str(error.value) == 'Could not find output module "robots". Did you mean "rabbitmq"?' # invalid preset type with pytest.raises(ValidationError) as error: - Scanner(preset="asdf") + scan = Scanner(preset="asdf") + await scan._prep() assert str(error.value) == 'Preset must be of type Preset, not "str"' # include nonexistent preset with pytest.raises(ValidationError) as error: diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index 81e063e053..4c6f1deb8e 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -225,41 +225,41 @@ def mock_event(type, module, parent=None): mock_scan = SimpleNamespace(status_frequency=60) stats = ScanStats(mock_scan) - httpx_mod = mock_module("httpx", ["URL", "HTTP_RESPONSE"]) + http_mod = mock_module("http", ["URL", "HTTP_RESPONSE"]) excavate_mod = mock_module("excavate", ["URL_UNVERIFIED", "WEB_PARAMETER"]) - ffuf_mod = mock_module("ffuf_shortnames", ["URL_UNVERIFIED"]) - ffuf2_mod = mock_module("ffuf", ["URL_UNVERIFIED"]) + web_brute_shortnames_mod = mock_module("web_brute_shortnames", ["URL_UNVERIFIED"]) + web_brute_mod = mock_module("web_brute", ["URL_UNVERIFIED"]) speculate_mod = mock_module("speculate", ["DNS_NAME", "OPEN_TCP_PORT", "IP_ADDRESS", "FINDING", "ORG_STUB"]) robots_mod = mock_module("robots", ["URL_UNVERIFIED"]) - # 1) excavate discovers URL_UNVERIFIED from HTTP_RESPONSE, httpx verifies → excavate gets credit + # 1) excavate discovers URL_UNVERIFIED from HTTP_RESPONSE, http verifies → excavate gets credit for _ in range(5): parent = mock_event("URL_UNVERIFIED", excavate_mod) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) - # 2) ffuf_shortnames discovers URL_UNVERIFIED, httpx verifies → ffuf_shortnames gets credit + # 2) web_brute_shortnames discovers URL_UNVERIFIED, http verifies → web_brute_shortnames gets credit for _ in range(3): - parent = mock_event("URL_UNVERIFIED", ffuf_mod) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + parent = mock_event("URL_UNVERIFIED", web_brute_shortnames_mod) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) - # 3) ffuf discovers URL_UNVERIFIED, httpx verifies → ffuf gets credit - parent = mock_event("URL_UNVERIFIED", ffuf2_mod) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + # 3) web_brute discovers URL_UNVERIFIED, http verifies → web_brute gets credit + parent = mock_event("URL_UNVERIFIED", web_brute_mod) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) - # 4) speculate (internal module) creates URL_UNVERIFIED, httpx verifies → httpx keeps credit + # 4) speculate (internal module) creates URL_UNVERIFIED, http verifies → http keeps credit for _ in range(4): parent = mock_event("URL_UNVERIFIED", speculate_mod) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) - # 5) robots discovers URL_UNVERIFIED, httpx verifies → robots gets credit + # 5) robots discovers URL_UNVERIFIED, http verifies → robots gets credit for _ in range(2): parent = mock_event("URL_UNVERIFIED", robots_mod) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) - # 6) httpx discovers URL directly from OPEN_TCP_PORT (no URL_UNVERIFIED parent) → httpx keeps credit + # 6) http discovers URL directly from OPEN_TCP_PORT (no URL_UNVERIFIED parent) → http keeps credit for _ in range(2): parent = mock_event("OPEN_TCP_PORT", mock_module("portscan")) - stats.event_produced(mock_event("URL", httpx_mod, parent=parent)) + stats.event_produced(mock_event("URL", http_mod, parent=parent)) # 7) non-URL event types are unaffected stats.event_produced(mock_event("DNS_NAME", mock_module("CNAME"))) @@ -267,11 +267,11 @@ def mock_event(type, module, parent=None): # verify per-module produced counts assert stats.module_stats["excavate"].produced == {"URL": 5} - assert stats.module_stats["ffuf_shortnames"].produced == {"URL": 3} - assert stats.module_stats["ffuf"].produced == {"URL": 1} + assert stats.module_stats["web_brute_shortnames"].produced == {"URL": 3} + assert stats.module_stats["web_brute"].produced == {"URL": 1} assert stats.module_stats["robots"].produced == {"URL": 2} - # httpx gets credit for speculate's 4 URLs + 2 from OPEN_TCP_PORT = 6 - assert stats.module_stats["httpx"].produced == {"URL": 6} + # http gets credit for speculate's 4 URLs + 2 from OPEN_TCP_PORT = 6 + assert stats.module_stats["http"].produced == {"URL": 6} assert "speculate" not in stats.module_stats assert stats.module_stats["CNAME"].produced == {"DNS_NAME": 1} assert stats.module_stats["cloudcheck"].produced == {"STORAGE_BUCKET": 1} @@ -284,11 +284,11 @@ def mock_event(type, module, parent=None): # build a dict of module_name -> produced_str from the table table_dict = {row[0]: row[1] for row in rows} - assert table_dict["httpx"] == "6 (6 URL)" + assert table_dict["http"] == "6 (6 URL)" assert table_dict["excavate"] == "5 (5 URL)" - assert table_dict["ffuf_shortnames"] == "3 (3 URL)" + assert table_dict["web_brute_shortnames"] == "3 (3 URL)" assert table_dict["robots"] == "2 (2 URL)" - assert table_dict["ffuf"] == "1 (1 URL)" + assert table_dict["web_brute"] == "1 (1 URL)" assert table_dict["CNAME"] == "1 (1 DNS_NAME)" assert table_dict["cloudcheck"] == "1 (1 STORAGE_BUCKET)" assert "speculate" not in table_dict diff --git a/bbot/test/test_step_1/test_scope.py b/bbot/test/test_step_1/test_scope.py index d89c2df07b..52ee4462a1 100644 --- a/bbot/test/test_step_1/test_scope.py +++ b/bbot/test/test_step_1/test_scope.py @@ -4,7 +4,7 @@ class TestScopeBaseline(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx"] + modules_overrides = ["http"] config_overrides = {"omit_event_types": []} async def setup_after_prep(self, module_test): diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 0039fce14b..6384277e78 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -654,6 +654,27 @@ async def test_blacklist_regex(bbot_scanner, bbot_httpserver): assert set(urls) == {"http://127.0.0.1:8888/"} +def test_blacklist_get_invalid_host(): + """Blacklist.get() should not crash when _make_event_seed returns None for an invalid host.""" + from bbot.scanner.target import ScanBlacklist + + blacklist = ScanBlacklist("bad.com") + # Inputs that fail EventSeed validation (e.g. wildcards, single chars) cause + # _make_event_seed() to return None. Previously this crashed with: + # AttributeError: 'NoneType' object has no attribute 'host' + for invalid in ["*", "*.example.com", "a", ""]: + result = blacklist.get(invalid) + assert result is None + # Multi-level subdomains should never crash Blacklist.get(), even if + # future changes cause EventSeed to reject them + for hostname in ["cdn.info.test.example.com", "a.b.c.d.example.com", "x.y.example.co.uk"]: + result = blacklist.get(hostname) + assert result is None + # Verify actual blacklisted hosts still work + result = blacklist.get("bad.com") + assert result is not None + + def test_no_double_parsing(): """Regression test: when seeds are auto-populated from target, EventSeed parsing should happen only once (via ScanTarget), not twice. BBOTTarget should pass diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index fbaf6f6f97..205db0f542 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -1,11 +1,12 @@ import re -import httpx + +from bbot.core.helpers.web.blast_response import BlasthttpHTTPError from ..bbot_fixtures import * @pytest.mark.asyncio -async def test_web_engine(bbot_scanner, bbot_httpserver, httpx_mock): +async def test_web(bbot_scanner, bbot_httpserver, blasthttp_mock): from werkzeug.wrappers import Response def server_handler(request): @@ -27,29 +28,18 @@ def server_handler(request): # request_batch urls = [f"{base_url}{i}" for i in range(num_urls)] - responses = [r async for r in scan.helpers.request_batch(urls)] + responses = await scan.helpers.request_batch(urls) assert len(responses) == 100 assert all(r[1].status_code == 200 and r[1].text.startswith(f"{r[0]}: ") for r in responses) - # request_batch w/ cancellation - agen = scan.helpers.request_batch(urls) - async for url, response in agen: - assert response.text.startswith(base_url) - await agen.aclose() - break - - # request_custom_batch + # request_batch with tracker urls_and_kwargs = [(urls[i], {"headers": {f"h{i}": f"v{i}"}}, i) for i in range(num_urls)] - results = [r async for r in scan.helpers.request_custom_batch(urls_and_kwargs)] - assert len(responses) == 100 + results = await scan.helpers.request_batch(urls_and_kwargs) + assert len(results) == 100 for result in results: - url, kwargs, custom_tracker, response = result - assert "headers" in kwargs - assert f"h{custom_tracker}" in kwargs["headers"] - assert kwargs["headers"][f"h{custom_tracker}"] == f"v{custom_tracker}" + url, response, custom_tracker = result assert response.status_code == 200 assert response.text.startswith(f"{url}: ") - assert f"H{custom_tracker}: v{custom_tracker}" in response.text # request with raise_error=True with pytest.raises(WebError): @@ -59,13 +49,13 @@ def server_handler(request): except WebError as e: assert hasattr(e, "response") assert e.response is None - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(BlasthttpHTTPError): response = await scan.helpers.request(bbot_httpserver.url_for("/nope"), raise_error=True) response.raise_for_status() try: response = await scan.helpers.request(bbot_httpserver.url_for("/nope"), raise_error=True) response.raise_for_status() - except httpx.HTTPStatusError as e: + except BlasthttpHTTPError as e: assert hasattr(e, "response") assert e.response.status_code == 500 @@ -95,51 +85,12 @@ def server_handler(request): @pytest.mark.asyncio -async def test_request_batch_cancellation(bbot_scanner, bbot_httpserver, httpx_mock): - import time - from werkzeug.wrappers import Response - - urls_requested = [] - - def server_handler(request): - time.sleep(0.75) - urls_requested.append(request.url.split("/")[-1]) - return Response(f"{request.url}: {request.headers}") - - base_url = bbot_httpserver.url_for("/test/") - bbot_httpserver.expect_request(uri=re.compile(r"/test/\d+")).respond_with_handler(server_handler) - - scan = bbot_scanner() - await scan._prep() - - urls = [f"{base_url}{i}" for i in range(100)] - - # request_batch w/ cancellation - agen = scan.helpers.request_batch(urls) - got_urls = [] - start = time.time() - async for url, response in agen: - assert response.text.startswith(base_url) - got_urls.append(url) - if time.time() > start + 1: - await agen.aclose() - break - - assert 5 < len(got_urls) < 15 - - await scan._cleanup() - - # TODO: enforce qsize limits on zmq to help prevent runaway generators - # assert 10 <= len(urls_requested) <= 20 - - -@pytest.mark.asyncio -async def test_web_helpers(bbot_scanner, bbot_httpserver, httpx_mock): +async def test_web_helpers(bbot_scanner, bbot_httpserver, blasthttp_mock): # json conversion scan = bbot_scanner("evilcorp.com") await scan._prep() url = "http://www.evilcorp.com/json_test?a=b" - httpx_mock.add_response(url=url, text="hello\nworld") + blasthttp_mock.add_response(url=url, text="hello\nworld") response = await scan.helpers.web.request(url) j = scan.helpers.response_to_json(response) assert j["status_code"] == 200 @@ -347,47 +298,59 @@ def sync_callback(data): @pytest.mark.asyncio -async def test_web_curl(bbot_scanner, bbot_httpserver): +async def test_web_request_target(bbot_scanner, bbot_httpserver): + """Test request() with request_target, ignore_bbot_global_settings, and other advanced kwargs.""" scan = bbot_scanner("127.0.0.1") await scan._prep() helpers = scan.helpers - url = bbot_httpserver.url_for("/curl") - bbot_httpserver.expect_request(uri="/curl").respond_with_data("curl_yep") - bbot_httpserver.expect_request(uri="/index.html").respond_with_data("curl_yep_index") - assert await helpers.curl(url=url) == "curl_yep" - assert await helpers.curl(url=url, ignore_bbot_global_settings=True) == "curl_yep" - assert (await helpers.curl(url=url, head_mode=True)).startswith("HTTP/") - assert await helpers.curl(url=url, raw_body="body") == "curl_yep" - assert ( - await helpers.curl( - url=url, - raw_path=True, - headers={"test": "test", "test2": ["test2"]}, - ignore_bbot_global_settings=False, - post_data={"test": "test"}, - method="POST", - cookies={"test": "test"}, - path_override="/index.html", - ) - == "curl_yep_index" + url = bbot_httpserver.url_for("/test-advanced") + bbot_httpserver.expect_request(uri="/test-advanced").respond_with_data("advanced_yep") + bbot_httpserver.expect_request(uri="/index.html").respond_with_data("index_yep") + + # basic request + r = await helpers.request(url=url) + assert r.text == "advanced_yep" + + # ignore_bbot_global_settings + r = await helpers.request(url=url, ignore_bbot_global_settings=True) + assert r.text == "advanced_yep" + + # HEAD method + r = await helpers.request(url=url, method="HEAD") + assert r.status_code == 200 + + # body kwarg + r = await helpers.request(url=url, body="body") + assert r.text == "advanced_yep" + + # request_target overrides the HTTP request-line path + r = await helpers.request( + url=url, + headers={"test": "test", "test2": ["test2"]}, + data={"test": "test"}, + method="POST", + cookies={"test": "test"}, + request_target="/index.html", ) - # test custom headers - bbot_httpserver.expect_request("/test-custom-http-headers-curl", headers={"test": "header"}).respond_with_data( - "curl_yep_headers" + assert r.text == "index_yep" + + # test custom headers from scan config + bbot_httpserver.expect_request("/test-custom-http-headers-advanced", headers={"test": "header"}).respond_with_data( + "headers_yep" ) - headers_url = bbot_httpserver.url_for("/test-custom-http-headers-curl") - curl_result = await helpers.curl(url=headers_url) - assert curl_result == "curl_yep_headers" + headers_url = bbot_httpserver.url_for("/test-custom-http-headers-advanced") + r = await helpers.request(url=headers_url) + assert r.text == "headers_yep" await scan._cleanup() @pytest.mark.asyncio -async def test_web_http_compare(httpx_mock, bbot_scanner): +async def test_web_http_compare(blasthttp_mock, bbot_scanner): scan = bbot_scanner() await scan._prep() helpers = scan.helpers - httpx_mock.add_response(url=re.compile(r"http://www\.example\.com.*"), text="wat") + blasthttp_mock.add_response(url=re.compile(r"http://www\.example\.com.*"), text="wat") compare_helper = helpers.http_compare("http://www.example.com") await compare_helper.compare("http://www.example.com", headers={"asdf": "asdf"}) await compare_helper.compare("http://www.example.com", cookies={"asdf": "asdf"}) @@ -448,46 +411,45 @@ async def test_http_ssl(bbot_scanner, bbot_httpserver_ssl): @pytest.mark.asyncio -async def test_web_cookies(bbot_scanner, httpx_mock): - import httpx - from bbot.core.helpers.web.client import BBOTAsyncClient +async def test_web_cookies(bbot_scanner, bbot_httpserver): + from werkzeug.wrappers import Response - # make sure cookies work when enabled - httpx_mock.add_response(url="http://www.evilcorp.com/cookies", headers=[("set-cookie", "wat=asdf; path=/")]) - scan = bbot_scanner() - await scan._prep() + def set_cookie_handler(request): + resp = Response("ok") + resp.set_cookie("wat", "asdf", path="/") + return resp - client = BBOTAsyncClient(persist_cookies=True, _config=scan.config, _target=scan.target) - r = await client.get(url="http://www.evilcorp.com/cookies") - assert r.cookies["wat"] == "asdf" - httpx_mock.add_response(url="http://www.evilcorp.com/cookies/test", match_headers={"Cookie": "wat=asdf"}) - r = await client.get(url="http://www.evilcorp.com/cookies/test") - # make sure we can manually send cookies - httpx_mock.add_response(url="http://www.evilcorp.com/cookies/test2", match_headers={"Cookie": "asdf=wat"}) - r = await scan.helpers.request(url="http://www.evilcorp.com/cookies/test2", cookies={"asdf": "wat"}) - assert client.cookies["wat"] == "asdf" + def echo_cookies_handler(request): + cookies = request.cookies + cookie_str = "; ".join([f"{key}={value}" for key, value in cookies.items()]) + return Response(f"Cookies: {cookie_str}") - await scan._cleanup() + bbot_httpserver.expect_request(uri="/setcookie").respond_with_handler(set_cookie_handler) + bbot_httpserver.expect_request(uri="/echocookie").respond_with_handler(echo_cookies_handler) - # make sure they don't when they're not - httpx_mock.add_response(url="http://www2.evilcorp.com/cookies", headers=[("set-cookie", "wats=fdsa; path=/")]) - scan = bbot_scanner() + scan = bbot_scanner("127.0.0.1") await scan._prep() - client2 = BBOTAsyncClient(persist_cookies=False, _config=scan.config, _target=scan.target) - r = await client2.get(url="http://www2.evilcorp.com/cookies") - # make sure we can access the cookies - assert "wats" in r.cookies - httpx_mock.add_response(url="http://www2.evilcorp.com/cookies/test", match_headers={"Cookie": "wats=fdsa"}) - # but that they're not sent in the response - with pytest.raises(httpx.TimeoutException): - r = await client2.get(url="http://www2.evilcorp.com/cookies/test") - # make sure cookies are sent - r = await client2.get(url="http://www2.evilcorp.com/cookies/test", cookies={"wats": "fdsa"}) - assert r.status_code == 200 - # make sure we can manually send cookies - httpx_mock.add_response(url="http://www2.evilcorp.com/cookies/test2", match_headers={"Cookie": "fdsa=wats"}) - r = await client2.get(url="http://www2.evilcorp.com/cookies/test2", cookies={"fdsa": "wats"}) - assert not client2.cookies + + # make sure Set-Cookie headers are parsed in the response + r = await scan.helpers.request(bbot_httpserver.url_for("/setcookie")) + assert r is not None + assert r.cookies.get("wat") == "asdf" + + # blasthttp does NOT persist cookies across requests (stateless by design) + r2 = await scan.helpers.request(bbot_httpserver.url_for("/echocookie")) + assert r2 is not None + assert "wat=asdf" not in r2.text + + # but manually sending cookies should work + r3 = await scan.helpers.request(bbot_httpserver.url_for("/echocookie"), cookies={"wat": "asdf"}) + assert r3 is not None + assert "wat=asdf" in r3.text + + # make sure multiple cookies are sent + r4 = await scan.helpers.request(bbot_httpserver.url_for("/echocookie"), cookies={"foo": "bar", "baz": "qux"}) + assert r4 is not None + assert "foo=bar" in r4.text + assert "baz=qux" in r4.text await scan._cleanup() @@ -541,6 +503,7 @@ def handler(request): bbot_httpserver.expect_request(uri=endpoint).respond_with_handler(handler) scan = bbot_scanner("127.0.0.1") + await scan._prep() module = BaseModule(scan) module.api_key = ["k1", "k2"] diff --git a/bbot/test/test_step_1/test_web_rate_limit.py b/bbot/test/test_step_1/test_web_rate_limit.py new file mode 100644 index 0000000000..fc52290f23 --- /dev/null +++ b/bbot/test/test_step_1/test_web_rate_limit.py @@ -0,0 +1,90 @@ +import time + +from ..bbot_fixtures import * + + +@pytest.mark.asyncio +async def test_web_rate_limit(bbot_scanner, bbot_httpserver): + """Verify that http_rate_limit throttles requests to the configured RPS.""" + bbot_httpserver.expect_request(uri="/rate_limit_test").respond_with_data("ok") + + # 10 requests per second = ~100ms between requests + rps = 10 + num_requests = 20 + scan = bbot_scanner("127.0.0.1", config={"web": {"http_rate_limit": rps}}) + await scan._prep() + + # verify the rate limit was applied to the blasthttp client + assert scan.helpers.web.client is scan.helpers.blasthttp + + url = bbot_httpserver.url_for("/rate_limit_test") + request_times = [] + for _ in range(num_requests): + r = await scan.helpers.request(url) + request_times.append(time.monotonic()) + assert r.status_code == 200 + + elapsed = request_times[-1] - request_times[0] + # at 10 rps, 20 requests should take at least ~1.9 seconds + # (19 intervals at 100ms each) + # use a conservative lower bound to avoid flakiness + min_expected = (num_requests - 1) / rps * 0.7 + assert elapsed >= min_expected, ( + f"Rate limiting not working: {num_requests} requests completed in {elapsed:.2f}s " + f"(expected >= {min_expected:.2f}s at {rps} rps)" + ) + + await scan._cleanup() + + +@pytest.mark.asyncio +async def test_web_no_rate_limit(bbot_scanner, bbot_httpserver): + """Verify that with no rate limit (default), requests are not throttled.""" + bbot_httpserver.expect_request(uri="/no_rate_limit_test").respond_with_data("ok") + + num_requests = 20 + scan = bbot_scanner("127.0.0.1") + await scan._prep() + + url = bbot_httpserver.url_for("/no_rate_limit_test") + request_times = [] + for _ in range(num_requests): + r = await scan.helpers.request(url) + request_times.append(time.monotonic()) + assert r.status_code == 200 + + elapsed = request_times[-1] - request_times[0] + # without rate limiting, 20 requests to localhost should complete well under 2 seconds + assert elapsed < 2.0, f"Requests unexpectedly slow without rate limiting: {elapsed:.2f}s" + + await scan._cleanup() + + +@pytest.mark.asyncio +async def test_batch_rate_limit_min_wins(bbot_scanner): + """When both a global and per-call rate limit are set, the more restrictive one should win. + + This tests blasthttp's min(global, per_call) behavior for request_batch. + """ + import blasthttp + import time + + # Set a lenient global rate (100 rps) on the client + client = blasthttp.BlastHTTP() + client.set_rate_limit(100.0) + + # Build 5 dummy configs (they'll fail to connect, but we only care about dispatch timing) + configs = [blasthttp.BatchConfig(f"http://127.0.0.1:1/{i}", timeout=1, retries=0) for i in range(5)] + + # Call with a more restrictive per-call rate (10 rps) + # If min() works, dispatch should be paced at 10 rps (~400ms for 5 requests) + # If global wins, dispatch would be at 100 rps (~40ms for 5 requests) + start = time.monotonic() + await client.request_batch(configs, concurrency=50, rate_limit=10.0) + elapsed = time.monotonic() - start + + # 5 requests at 10 rps = 4 intervals × 100ms = ~400ms minimum + assert elapsed >= 0.3, ( + f"Per-call rate limit (10 rps) should win over global (100 rps), " + f"but batch completed in {elapsed:.3f}s (expected >= 0.3s)" + ) diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index ffb4ae0d46..534e9f66df 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -25,7 +25,7 @@ class ModuleTestBase: class ModuleTest: def __init__( - self, module_test_base, httpx_mock, httpserver, httpserver_ssl, monkeypatch, request, caplog, capsys + self, module_test_base, blasthttp_mock, httpserver, httpserver_ssl, monkeypatch, request, caplog, capsys ): self.name = module_test_base.name self.config = OmegaConf.merge(CORE.config, OmegaConf.create(module_test_base.config_overrides)) @@ -33,7 +33,7 @@ def __init__( self.caplog = caplog self.capsys = capsys - self.httpx_mock = httpx_mock + self.blasthttp_mock = blasthttp_mock self.httpserver = httpserver self.httpserver_ssl = httpserver_ssl self.monkeypatch = monkeypatch @@ -92,7 +92,7 @@ def module(self): @pytest_asyncio.fixture async def module_test( - self, httpx_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys + self, blasthttp_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys ): # If a test uses docker, we can't run it in the distro tests if os.getenv("BBOT_DISTRO_TESTS") and self.skip_distro_tests: @@ -100,7 +100,7 @@ async def module_test( self.log.info(f"Starting {self.name} module test") module_test = self.ModuleTest( - self, httpx_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys + self, blasthttp_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys ) self.log.debug("Executing setup_before_prep()") await self.setup_before_prep(module_test) diff --git a/bbot/test/test_step_2/module_tests/test_module_ajaxpro.py b/bbot/test/test_step_2/module_tests/test_module_ajaxpro.py index 7cbbbb783c..21c44f8519 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ajaxpro.py +++ b/bbot/test/test_step_2/module_tests/test_module_ajaxpro.py @@ -3,7 +3,7 @@ class TestAjaxpro(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "ajaxpro"] + modules_overrides = ["http", "ajaxpro"] exploit_headers = {"X-Ajaxpro-Method": "AddItem", "Content-Type": "text/json; charset=UTF-8"} exploit_response = """ null; r.error = {"Message":"Constructor on type 'AjaxPro.Services.ICartService' not found.","Type":"System.MissingMethodException"};/* diff --git a/bbot/test/test_step_2/module_tests/test_module_anubisdb.py b/bbot/test/test_step_2/module_tests/test_module_anubisdb.py index 7b1bc6659d..aef3f0c73e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_anubisdb.py +++ b/bbot/test/test_step_2/module_tests/test_module_anubisdb.py @@ -4,7 +4,7 @@ class TestAnubisdb(ModuleTestBase): async def setup_after_prep(self, module_test): module_test.module.abort_if = lambda e: False - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://jldc.me/anubis/subdomains/blacklanternsecurity.com", json=["asdf.blacklanternsecurity.com", "zzzz.blacklanternsecurity.com"], ) diff --git a/bbot/test/test_step_2/module_tests/test_module_apkpure.py b/bbot/test/test_step_2/module_tests/test_module_apkpure.py index 236eb943df..adc687a7b0 100644 --- a/bbot/test/test_step_2/module_tests/test_module_apkpure.py +++ b/bbot/test/test_step_2/module_tests/test_module_apkpure.py @@ -10,7 +10,7 @@ class TestAPKPure(ModuleTestBase): async def setup_after_prep(self, module_test): await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.99"]}}) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://play.google.com/store/search?q=blacklanternsecurity&c=apps", text=""" @@ -22,7 +22,7 @@ async def setup_after_prep(self, module_test): """, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://play.google.com/store/apps/details?id=com.bbot.test", text=""" @@ -36,7 +36,7 @@ async def setup_after_prep(self, module_test): """, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://d.apkpure.com/b/XAPK/com.bbot.test?version=latest", content=self.apk_file, headers={ diff --git a/bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py b/bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py index f86578ebac..1ce91d45fc 100644 --- a/bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py +++ b/bbot/test/test_step_2/module_tests/test_module_aspnet_bin_exposure.py @@ -4,7 +4,7 @@ class TestAspnetBinExposure(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "aspnet_bin_exposure"] + modules_overrides = ["http", "aspnet_bin_exposure"] config_overrides = { "modules": { "aspnet_bin_exposure": { diff --git a/bbot/test/test_step_2/module_tests/test_module_azure_tenant.py b/bbot/test/test_step_2/module_tests/test_module_azure_tenant.py index 787ecbdba1..419f15aa3a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_azure_tenant.py +++ b/bbot/test/test_step_2/module_tests/test_module_azure_tenant.py @@ -23,7 +23,7 @@ async def setup_after_prep(self, module_test): email_domains.append("blacklanternsecurity.onmicrosoft.com") # Mock azmap.dev response - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://azmap.dev/api/tenant?domain=blacklanternsecurity.com&extract=true", json={ "tenant_id": "test-tenant-id", @@ -33,7 +33,7 @@ async def setup_after_prep(self, module_test): ) # Mock ODC endpoint - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://odc.officeapps.live.com/odc/v2.1/federationprovider?domain=blacklanternsecurity.com", json={}, ) @@ -53,7 +53,7 @@ async def setup_after_prep(self, module_test): "cloud_instance_name": "login.microsoftonline.us", } - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/blacklanternsecurity.com/.well-known/openid-configuration", json=openid_config, ) @@ -67,26 +67,26 @@ async def setup_after_prep(self, module_test): if self.federation_url: getcred_response["Credentials"]["FederationRedirectUrl"] = self.federation_url - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/GetCredentialType", method="POST", json=getcred_response, ) # Mock UserRealm v2.0 - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/userrealm/test@blacklanternsecurity.com?api-version=2.0", json={}, ) # Mock MTA-STS if self.exchange_online: - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://mta-sts.blacklanternsecurity.com/.well-known/mta-sts.txt", text="version: STSv1\nmode: enforce\nmx: blacklanternsecurity-com.mail.protection.outlook.com\nmax_age: 604800", ) else: - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://mta-sts.blacklanternsecurity.com/.well-known/mta-sts.txt", status_code=404, ) @@ -94,7 +94,7 @@ async def setup_after_prep(self, module_test): # Mock Directory Sync check if needed if self.include_onmicrosoft: sync_result = 0 if self.directory_sync_enabled else 1 - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/GetCredentialType", method="POST", json={"IfExistsResult": sync_result}, @@ -120,17 +120,17 @@ class TestAzure_Tenant(AzureTenantTestBase): async def setup_after_prep(self, module_test): # Use custom response for this test - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://azmap.dev/api/tenant?domain=blacklanternsecurity.com&extract=true", json=self.tenant_response, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://odc.officeapps.live.com/odc/v2.1/federationprovider?domain=blacklanternsecurity.com", json={"TenantId": "cc74fc12-4142-400e-a653-f98bdeadbeef"}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/blacklanternsecurity.com/.well-known/openid-configuration", json={ "tenant_region_scope": "NA", @@ -138,23 +138,23 @@ async def setup_after_prep(self, module_test): }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/GetCredentialType", json={"EstsProperties": {}, "Credentials": {}}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/userrealm/test@blacklanternsecurity.com?api-version=2.0", json={"NameSpaceType": "Managed"}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://mta-sts.blacklanternsecurity.com/.well-known/mta-sts.txt", status_code=404, ) # Directory sync check - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://login.microsoftonline.com/common/GetCredentialType", json={"IfExistsResult": 1}, ) diff --git a/bbot/test/test_step_2/module_tests/test_module_badsecrets.py b/bbot/test/test_step_2/module_tests/test_module_badsecrets.py index e5143767d2..2e42521e8d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_badsecrets.py +++ b/bbot/test/test_step_2/module_tests/test_module_badsecrets.py @@ -30,7 +30,7 @@ class TestBadSecrets(ModuleTestBase):

""" - modules_overrides = ["badsecrets", "httpx"] + modules_overrides = ["badsecrets", "http"] async def setup_after_prep(self, module_test): expect_args = {"uri": "/test.aspx"} @@ -141,7 +141,7 @@ class TestBadSecrets_JWTIdentifyOnly(ModuleTestBase): "http://127.0.0.1:8888/vuln_jwt.aspx", "http://127.0.0.1:8888/safe_jwt.aspx", ] - modules_overrides = ["badsecrets", "httpx"] + modules_overrides = ["badsecrets", "http"] # JWT signed with a secret NOT in badsecrets' wordlists (will produce IdentifyOnly) safe_jwt = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0In0.BHvEIdlrTFS4VXvT9nUOycVzokhfIYSxJa7DXNz_h0o" diff --git a/bbot/test/test_step_2/module_tests/test_module_bevigil.py b/bbot/test/test_step_2/module_tests/test_module_bevigil.py index 7f1cb5d79d..c27a2276e7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bevigil.py +++ b/bbot/test/test_step_2/module_tests/test_module_bevigil.py @@ -8,7 +8,7 @@ class TestBeVigil(ModuleTestBase): config_overrides = {"modules": {"bevigil": {"api_key": "asdf", "urls": True}}} async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://osint.bevigil.com/api/blacklanternsecurity.com/subdomains/", match_headers={"X-Access-Token": "asdf"}, json={ @@ -18,7 +18,7 @@ async def setup_after_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://osint.bevigil.com/api/blacklanternsecurity.com/urls/", json={"domain": "blacklanternsecurity.com", "urls": ["https://asdf.blacklanternsecurity.com"]}, ) @@ -34,7 +34,7 @@ class TestBeVigilMultiKey(TestBeVigil): config_overrides = {"modules": {"bevigil": {"api_key": api_keys, "urls": True}}} async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://osint.bevigil.com/api/blacklanternsecurity.com/subdomains/", match_headers={"X-Access-Token": "fdsa"}, json={ @@ -44,7 +44,7 @@ async def setup_after_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( match_headers={"X-Access-Token": "asdf"}, url="https://osint.bevigil.com/api/blacklanternsecurity.com/urls/", json={"domain": "blacklanternsecurity.com", "urls": ["https://asdf.blacklanternsecurity.com"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py index 55220da36b..94e6140df2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py @@ -36,7 +36,7 @@ def module_name(self): @property def modules_overrides(self): - return ["excavate", "speculate", "httpx", self.module_name, "cloudcheck"] + return ["excavate", "speculate", "http", self.module_name, "cloudcheck"] def url_setup(self): self.url_1 = f"https://{self.random_bucket_1}/" @@ -60,16 +60,16 @@ async def setup_after_prep(self, module_test): expect_args={"method": "GET", "uri": "/"}, respond_args={"response_data": self.website_body} ) if module_test.module.supports_open_check: - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url=self.url_2, text=self.open_bucket_body, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url=self.url_3, text="", ) if self.nonexistent_is_404: - module_test.httpx_mock.add_response(url=re.compile(".*"), text="", status_code=404) + module_test.blasthttp_mock.add_response(url=re.compile(".*"), text="", status_code=404) def check(self, module_test, events): storage_buckets = [e for e in events if e.type == "STORAGE_BUCKET"] diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py index da939222ed..5fae2d25bd 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py @@ -4,7 +4,7 @@ class TestBucket_File_Enum(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate", "cloudcheck"] + modules_overrides = ["bucket_file_enum", "filedownload", "http", "excavate", "cloudcheck"] download_dir = bbot_test_dir / "test_bucket_file_enum" config_overrides = { @@ -23,16 +23,16 @@ class TestBucket_File_Enum(ModuleTestBase): async def setup_before_prep(self, module_test): module_test.httpserver.expect_request("/").respond_with_data(f'') - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url=self.open_bucket_url, text=self.open_bucket_body, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url=f"{self.open_bucket_url}test.pdf", text=self.pdf_data, headers={"Content-Type": "application/pdf"}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url=f"{self.open_bucket_url}test.css", text="", ) diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_google.py b/bbot/test/test_step_2/module_tests/test_module_bucket_google.py index 71100a9476..65d241b681 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_google.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_google.py @@ -23,5 +23,5 @@ def bucket_setup(self): def url_setup(self): self.url_1 = f"https://{random_bucket_name_1}.storage.googleapis.com" - self.url_2 = f"https://www.googleapis.com/storage/v1/b/{random_bucket_name_2}/iam/testPermissions?&permissions=storage.buckets.get&permissions=storage.buckets.list&permissions=storage.buckets.create&permissions=storage.buckets.delete&permissions=storage.buckets.setIamPolicy&permissions=storage.objects.get&permissions=storage.objects.list&permissions=storage.objects.create&permissions=storage.objects.delete&permissions=storage.objects.setIamPolicy" + self.url_2 = f"https://www.googleapis.com/storage/v1/b/{random_bucket_name_2}/iam/testPermissions?permissions=storage.buckets.get&permissions=storage.buckets.list&permissions=storage.buckets.create&permissions=storage.buckets.delete&permissions=storage.buckets.setIamPolicy&permissions=storage.objects.get&permissions=storage.objects.list&permissions=storage.objects.create&permissions=storage.objects.delete&permissions=storage.objects.setIamPolicy" self.url_3 = f"https://www.googleapis.com/storage/v1/b/{random_bucket_name_3}" diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py b/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py index 87ea18a440..fbcfa78050 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_microsoft.py @@ -21,8 +21,8 @@ class TestBucket_Microsoft_NoDup(ModuleTestBase): module_name = "bucket_microsoft" config_overrides = {"cloudcheck": True} - async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + async def setup_before_prep(self, module_test): + module_test.blasthttp_mock.add_response( url="https://tesla.blob.core.windows.net/tesla?restype=container", text="", ) diff --git a/bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py b/bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py index e77127bc38..8f99b5873a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py +++ b/bbot/test/test_step_2/module_tests/test_module_bufferoverrun.py @@ -6,7 +6,7 @@ class TestBufferOverrun(ModuleTestBase): async def setup_before_prep(self, module_test): # Mock response for non-commercial API - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://tls.bufferover.run/dns?q=.blacklanternsecurity.com", match_headers={"x-api-key": "asdf"}, json={"Results": ["1.2.3.4,example.com,*,*,sub.blacklanternsecurity.com"]}, @@ -23,7 +23,7 @@ class TestBufferOverrunCommercial(ModuleTestBase): async def setup_before_prep(self, module_test): # Mock response for commercial API - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://bufferover-run-tls.p.rapidapi.com/ipv4/dns?q=.blacklanternsecurity.com", match_headers={"x-rapidapi-host": "bufferover-run-tls.p.rapidapi.com", "x-rapidapi-key": "asdf"}, json={"Results": ["5.6.7.8,blacklanternsecurity.com,*,*,sub.blacklanternsecurity.com"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_builtwith.py b/bbot/test/test_step_2/module_tests/test_module_builtwith.py index d11c8940d2..23f16a0fc8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_builtwith.py +++ b/bbot/test/test_step_2/module_tests/test_module_builtwith.py @@ -5,7 +5,7 @@ class TestBuiltWith(ModuleTestBase): config_overrides = {"modules": {"builtwith": {"api_key": "asdf"}}} async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.builtwith.com/v20/api.json?KEY=asdf&LOOKUP=blacklanternsecurity.com&NOMETA=yes&NOATTR=yes&HIDETEXT=yes&HIDEDL=yes", json={ "Results": [ @@ -90,7 +90,7 @@ async def setup_after_prep(self, module_test): "Trust": None, }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.builtwith.com/redirect1/api.json?KEY=asdf&LOOKUP=blacklanternsecurity.com", json={ "Lookup": "blacklanternsecurity.com", diff --git a/bbot/test/test_step_2/module_tests/test_module_bypass403.py b/bbot/test/test_step_2/module_tests/test_module_bypass403.py index 57c1a8bedf..a78fec4c40 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bypass403.py +++ b/bbot/test/test_step_2/module_tests/test_module_bypass403.py @@ -4,7 +4,7 @@ class TestBypass403(ModuleTestBase): targets = ["http://127.0.0.1:8888/test"] - modules_overrides = ["bypass403", "httpx"] + modules_overrides = ["bypass403", "http"] async def setup_after_prep(self, module_test): expect_args = {"method": "GET", "uri": "/test..;/"} @@ -21,7 +21,7 @@ def check(self, module_test, events): class TestBypass403_collapsethreshold(ModuleTestBase): targets = ["http://127.0.0.1:8888/test"] - modules_overrides = ["bypass403", "httpx"] + modules_overrides = ["bypass403", "http"] async def setup_after_prep(self, module_test): respond_args = {"response_data": "alive"} @@ -72,7 +72,7 @@ def check(self, module_test, events): class TestBypass403_aspnetcookieless(ModuleTestBase): targets = ["http://127.0.0.1:8888/admin.aspx"] - modules_overrides = ["bypass403", "httpx"] + modules_overrides = ["bypass403", "http"] async def setup_after_prep(self, module_test): expect_args = {"method": "GET", "uri": re.compile(r"\/\([sS]\(\w+\)\)\/.+\.aspx")} @@ -88,7 +88,7 @@ def check(self, module_test, events): class TestBypass403_waf(ModuleTestBase): targets = ["http://127.0.0.1:8888/test"] - modules_overrides = ["bypass403", "httpx"] + modules_overrides = ["bypass403", "http"] async def setup_after_prep(self, module_test): expect_args = {"method": "GET", "uri": "/test..;/"} diff --git a/bbot/test/test_step_2/module_tests/test_module_c99.py b/bbot/test/test_step_2/module_tests/test_module_c99.py index 5721776483..846a5e07dc 100644 --- a/bbot/test/test_step_2/module_tests/test_module_c99.py +++ b/bbot/test/test_step_2/module_tests/test_module_c99.py @@ -1,4 +1,4 @@ -import httpx +from bbot.test.mock_blasthttp import TimeoutException from .base import ModuleTestBase @@ -8,11 +8,11 @@ class TestC99(ModuleTestBase): config_overrides = {"modules": {"c99": {"api_key": "asdf"}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.c99.nl/randomnumber?key=asdf&between=1,100&json", json={"success": True, "output": 65}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.c99.nl/subdomainfinder?key=asdf&domain=blacklanternsecurity.com&json", json={ "success": True, @@ -32,7 +32,7 @@ class TestC99AbortThreshold1(TestC99): config_overrides = {"modules": {"c99": {"api_key": ["6789", "fdsa", "1234", "4321"]}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.c99.nl/randomnumber?key=fdsa&between=1,100&json", json={"success": True, "output": 65}, ) @@ -45,9 +45,9 @@ async def custom_callback(request): self.url_count[url] += 1 except KeyError: self.url_count[url] = 1 - raise httpx.TimeoutException("timeout") + raise TimeoutException("timeout") - module_test.httpx_mock.add_callback(custom_callback) + module_test.blasthttp_mock.add_callback(custom_callback) def check(self, module_test, events): assert module_test.module.api_failure_abort_threshold == 13 diff --git a/bbot/test/test_step_2/module_tests/test_module_censys_dns.py b/bbot/test/test_step_2/module_tests/test_module_censys_dns.py index bab6f0060a..f009e8c59b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_censys_dns.py +++ b/bbot/test/test_step_2/module_tests/test_module_censys_dns.py @@ -5,7 +5,7 @@ class TestCensys_DNS(ModuleTestBase): config_overrides = {"modules": {"censys_dns": {"api_key": "api_id:api_secret"}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -16,7 +16,7 @@ async def setup_before_prep(self, module_test): "quota": {"used": 26, "allowance": 250, "resets_at": "1919-06-03 16:30:32"}, }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v2/certificates/search", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, method="POST", @@ -46,7 +46,7 @@ async def setup_before_prep(self, module_test): }, }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v2/certificates/search", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, method="POST", diff --git a/bbot/test/test_step_2/module_tests/test_module_censys_ip.py b/bbot/test/test_step_2/module_tests/test_module_censys_ip.py index a0a879d5ae..6c94a2de73 100644 --- a/bbot/test/test_step_2/module_tests/test_module_censys_ip.py +++ b/bbot/test/test_step_2/module_tests/test_module_censys_ip.py @@ -6,7 +6,26 @@ class TestCensys_IP(ModuleTestBase): config_overrides = {"modules": {"censys_ip": {"api_key": "api_id:api_secret"}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + await module_test.mock_dns( + { + "wildcard.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "certname.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "certsubject.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "reversedns.evilcorp.com": { + "A": ["1.2.3.4"], + }, + "ptr.evilcorp.com": { + "A": ["1.2.3.4"], + }, + } + ) + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -17,7 +36,7 @@ async def setup_before_prep(self, module_test): "quota": {"used": 26, "allowance": 250, "resets_at": "1919-06-03 16:30:32"}, }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v2/hosts/1.2.3.4", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -228,7 +247,8 @@ class TestCensys_IP_InScopeOnly(ModuleTestBase): config_overrides = {"modules": {"censys_ip": {"api_key": "api_id:api_secret", "in_scope_only": True}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -236,7 +256,7 @@ async def setup_before_prep(self, module_test): }, ) # This should NOT be called because in_scope_only=True - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v2/hosts/1.1.1.1", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -271,7 +291,8 @@ class TestCensys_IP_OutOfScope(ModuleTestBase): } async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + await module_test.mock_dns({"evilcorp.com": {"A": ["1.1.1.1"]}}) + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v1/account", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ @@ -279,7 +300,7 @@ async def setup_before_prep(self, module_test): }, ) # This SHOULD be called because in_scope_only=False - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://search.censys.io/api/v2/hosts/1.1.1.1", match_headers={"Authorization": "Basic YXBpX2lkOmFwaV9zZWNyZXQ="}, json={ diff --git a/bbot/test/test_step_2/module_tests/test_module_certspotter.py b/bbot/test/test_step_2/module_tests/test_module_certspotter.py index a9ab7eb36b..aac235af45 100644 --- a/bbot/test/test_step_2/module_tests/test_module_certspotter.py +++ b/bbot/test/test_step_2/module_tests/test_module_certspotter.py @@ -5,10 +5,30 @@ class TestCertspotter(ModuleTestBase): async def setup_after_prep(self, module_test): module_test.module.abort_if = lambda e: False for t in self.targets: - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.certspotter.com/v1/issuances?domain=blacklanternsecurity.com&include_subdomains=true&expand=dns_names", json=[{"dns_names": ["*.asdf.blacklanternsecurity.com"]}], ) def check(self, module_test, events): assert any(e.data == "asdf.blacklanternsecurity.com" for e in events), "Failed to detect subdomain" + + +class TestCertspotterRateLimited(ModuleTestBase): + module_name = "certspotter" + modules_overrides = ["certspotter"] + + async def setup_after_prep(self, module_test): + module_test.module.abort_if = lambda e: False + module_test.blasthttp_mock.add_response( + url="https://api.certspotter.com/v1/issuances?domain=blacklanternsecurity.com&include_subdomains=true&expand=dns_names", + json={ + "code": "rate_limited", + "message": "You have exceeded the domain search rate limit for the SSLMate CT Search API.", + }, + ) + + def check(self, module_test, events): + assert not any(e.type == "DNS_NAME" and e.data != "blacklanternsecurity.com" for e in events), ( + "Should not produce subdomains from a rate-limited response" + ) diff --git a/bbot/test/test_step_2/module_tests/test_module_chaos.py b/bbot/test/test_step_2/module_tests/test_module_chaos.py index 193bded584..cdf5791456 100644 --- a/bbot/test/test_step_2/module_tests/test_module_chaos.py +++ b/bbot/test/test_step_2/module_tests/test_module_chaos.py @@ -5,12 +5,12 @@ class TestChaos(ModuleTestBase): config_overrides = {"modules": {"chaos": {"api_key": "asdf"}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://dns.projectdiscovery.io/dns/example.com", match_headers={"Authorization": "asdf"}, json={"domain": "example.com", "subdomains": 65}, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://dns.projectdiscovery.io/dns/blacklanternsecurity.com/subdomains", match_headers={"Authorization": "asdf"}, json={ diff --git a/bbot/test/test_step_2/module_tests/test_module_cloudcheck.py b/bbot/test/test_step_2/module_tests/test_module_cloudcheck.py index 92815c2dcc..a1672f76b5 100644 --- a/bbot/test/test_step_2/module_tests/test_module_cloudcheck.py +++ b/bbot/test/test_step_2/module_tests/test_module_cloudcheck.py @@ -5,7 +5,7 @@ class TestCloudCheck(ModuleTestBase): targets = ["http://127.0.0.1:8888", "asdf2.storage.googleapis.com"] - modules_overrides = ["httpx", "excavate", "cloudcheck"] + modules_overrides = ["http", "excavate", "cloudcheck"] async def setup_after_prep(self, module_test): module_test.set_expect_requests({"uri": "/"}, {"response_data": ""}) diff --git a/bbot/test/test_step_2/module_tests/test_module_code_repository.py b/bbot/test/test_step_2/module_tests/test_module_code_repository.py index bfb01ef03f..20df2dfb28 100644 --- a/bbot/test/test_step_2/module_tests/test_module_code_repository.py +++ b/bbot/test/test_step_2/module_tests/test_module_code_repository.py @@ -3,7 +3,7 @@ class TestCodeRepository(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "excavate", "code_repository"] + modules_overrides = ["http", "excavate", "code_repository"] async def setup_after_prep(self, module_test): expect_args = {"method": "GET", "uri": "/"} diff --git a/bbot/test/test_step_2/module_tests/test_module_credshed.py b/bbot/test/test_step_2/module_tests/test_module_credshed.py index a6b1e65c51..17093ecb82 100644 --- a/bbot/test/test_step_2/module_tests/test_module_credshed.py +++ b/bbot/test/test_step_2/module_tests/test_module_credshed.py @@ -57,12 +57,12 @@ class TestCredshed(ModuleTestBase): } async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://credshed.com/api/auth", json=credshed_auth_response, method="POST", ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://credshed.com/api/search", json=credshed_response, method="POST", diff --git a/bbot/test/test_step_2/module_tests/test_module_crt.py b/bbot/test/test_step_2/module_tests/test_module_crt.py index 5ee8ae4d31..34bfd7b1a1 100644 --- a/bbot/test/test_step_2/module_tests/test_module_crt.py +++ b/bbot/test/test_step_2/module_tests/test_module_crt.py @@ -5,7 +5,7 @@ class TestCRT(ModuleTestBase): async def setup_after_prep(self, module_test): module_test.module.abort_if = lambda e: False for t in self.targets: - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://crt.sh?q=%25.blacklanternsecurity.com&output=json", json=[{"id": 1, "name_value": "asdf.blacklanternsecurity.com\nzzzz.blacklanternsecurity.com"}], ) diff --git a/bbot/test/test_step_2/module_tests/test_module_dehashed.py b/bbot/test/test_step_2/module_tests/test_module_dehashed.py index 4821fc5458..5a174e7fac 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dehashed.py +++ b/bbot/test/test_step_2/module_tests/test_module_dehashed.py @@ -8,8 +8,8 @@ class TestDehashed(ModuleTestBase): "modules": {"dehashed": {"api_key": "deadbeef"}}, } - async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + async def setup_before_prep(self, module_test): + module_test.blasthttp_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", json={ @@ -40,6 +40,8 @@ async def setup_after_prep(self, module_test): "total": 2, }, ) + + async def setup_after_prep(self, module_test): await module_test.mock_dns( { "bob.com": {"A": ["127.0.0.1"]}, @@ -96,7 +98,7 @@ def check(self, module_test, events): class TestDehashedBadEmail(TestDehashed): async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", json={ @@ -119,8 +121,8 @@ def check(self, module_test, events): class TestDehashedHTTPError(TestDehashed): - async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + async def setup_before_prep(self, module_test): + module_test.blasthttp_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", json={"error": "issue with request body"}, @@ -130,14 +132,14 @@ async def setup_after_prep(self, module_test): def check(self, module_test, events): scan_log_content = open(module_test.scan.home / "scan.log").read() assert ( - 'Error retrieving results from dehashed.com (status code 400): {"error":"issue with request body"}' + 'Error retrieving results from dehashed.com (status code 400): {"error": "issue with request body"}' in scan_log_content ) class TestDehashedTooManyResults(TestDehashed): async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.dehashed.com/v2/search", method="POST", json={ diff --git a/bbot/test/test_step_2/module_tests/test_module_digitorus.py b/bbot/test/test_step_2/module_tests/test_module_digitorus.py index a683a17d8f..efb221429d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_digitorus.py +++ b/bbot/test/test_step_2/module_tests/test_module_digitorus.py @@ -10,7 +10,7 @@ class TestDigitorus(ModuleTestBase): """ async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://certificatedetails.com/blacklanternsecurity.com", text=self.web_response, ) diff --git a/bbot/test/test_step_2/module_tests/test_module_discord.py b/bbot/test/test_step_2/module_tests/test_module_discord.py index 96f2d769d9..06cfa6d8f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_discord.py +++ b/bbot/test/test_step_2/module_tests/test_module_discord.py @@ -1,11 +1,11 @@ -import httpx +from bbot.test.mock_blasthttp import MockResponse from .base import ModuleTestBase class TestDiscord(ModuleTestBase): targets = ["http://127.0.0.1:8888/cookie.aspx", "http://127.0.0.1:8888/cookie2.aspx", "foo.bar"] - modules_overrides = ["discord", "excavate", "badsecrets", "httpx"] + modules_overrides = ["discord", "excavate", "badsecrets", "http"] webhook_url = "https://discord.com/api/webhooks/1234/deadbeef-P-uF-asdf" config_overrides = {"modules": {"discord": {"webhook_url": webhook_url, "min_severity": "INFO"}}} @@ -24,14 +24,14 @@ def custom_setup(self, module_test): async def setup_after_prep(self, module_test): self.custom_setup(module_test) - def custom_response(request: httpx.Request): + def custom_response(request): module_test.request_count += 1 if module_test.request_count == 2: - return httpx.Response(status_code=429, json={"retry_after": 0.01}) + return MockResponse(status_code=429, json={"retry_after": 0.01}) else: - return httpx.Response(status_code=200) + return MockResponse(status_code=200) - module_test.httpx_mock.add_callback(custom_response, url=self.webhook_url) + module_test.blasthttp_mock.add_callback(custom_response, url=self.webhook_url) def check(self, module_test, events): findings = [e for e in events if e.type == "FINDING"] diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py b/bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py index 8f5f99e838..5de39cb0b6 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsdumpster.py @@ -3,11 +3,11 @@ class TestDNSDumpster(ModuleTestBase): async def setup_after_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://dnsdumpster.com", content=b"""

""", ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://api.dnsdumpster.com/htmld/", content=b"asdf.blacklanternsecurity.com", ) diff --git a/bbot/test/test_step_2/module_tests/test_module_docker_pull.py b/bbot/test/test_step_2/module_tests/test_module_docker_pull.py index df9d76d50c..a94031f097 100644 --- a/bbot/test/test_step_2/module_tests/test_module_docker_pull.py +++ b/bbot/test/test_step_2/module_tests/test_module_docker_pull.py @@ -11,7 +11,7 @@ class TestDockerPull(ModuleTestBase): config_overrides = {"modules": {"docker_pull": {"output_folder": str(bbot_test_dir / "test_docker_files")}}} async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://hub.docker.com/v2/users/blacklanternsecurity", json={ "id": "f90895d9cf484d9182c6dbbef2632329", @@ -27,7 +27,7 @@ async def setup_before_prep(self, module_test): "type": "User", }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://hub.docker.com/v2/repositories/blacklanternsecurity?page_size=25&page=1", json={ "count": 2, @@ -71,7 +71,7 @@ async def setup_before_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/helloworld/tags/list", json={ "errors": [ @@ -94,7 +94,7 @@ async def setup_before_prep(self, module_test): }, status_code=401, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/testimage/tags/list", json={ "errors": [ @@ -117,19 +117,19 @@ async def setup_before_prep(self, module_test): }, status_code=401, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://auth.docker.io/token?service=registry.docker.io&scope=blacklanternsecurity/helloworld:pull", json={ "token": "QWERTYUIOPASDFGHJKLZXCBNM", }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://auth.docker.io/token?service=registry.docker.io&scope=blacklanternsecurity/testimage:pull", json={ "token": "QWERTYUIOPASDFGHJKLZXCBNM", }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/helloworld/tags/list", json={ "name": "blacklanternsecurity/helloworld", @@ -139,7 +139,7 @@ async def setup_before_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/testimage/tags/list", json={ "name": "blacklanternsecurity/testimage", @@ -149,7 +149,7 @@ async def setup_before_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/helloworld/manifests/latest", json={ "schemaVersion": 2, @@ -168,7 +168,7 @@ async def setup_before_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/testimage/manifests/latest", json={ "mediaType": "application/vnd.docker.distribution.manifest.list.v2+json", @@ -201,7 +201,7 @@ async def setup_before_prep(self, module_test): ], }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/helloworld/blobs/sha256:a9910947b74a4f0606cfc8669ae8808d2c328beaee9e79f489dc17df14cd50b1", json={ "architecture": "amd64", @@ -376,7 +376,7 @@ async def setup_before_prep(self, module_test): }, }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/testimage/manifests/sha256:7c75331408141f1e3ef37eac7c45938fbfb0d421a86201ad45d2ab8b70ddd527", json={ "name": "testimage", @@ -422,11 +422,11 @@ async def setup_before_prep(self, module_test): tar.addfile(file_info, file_io) with open(tar_path, "rb") as file: layer_file = file.read() - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/helloworld/blobs/sha256:8a1e25ce7c4f75e372e9884f8f7b1bedcfe4a7a7d452eb4b0a1c7477c9a90345", content=layer_file, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://registry-1.docker.io/v2/blacklanternsecurity/testimage/blobs/sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef", content=layer_file, ) diff --git a/bbot/test/test_step_2/module_tests/test_module_dockerhub.py b/bbot/test/test_step_2/module_tests/test_module_dockerhub.py index 67b75ac6e3..dbc12d6a12 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dockerhub.py +++ b/bbot/test/test_step_2/module_tests/test_module_dockerhub.py @@ -5,7 +5,7 @@ class TestDockerhub(ModuleTestBase): modules_overrides = ["dockerhub", "speculate"] async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://hub.docker.com/v2/users/blacklanternsecurity", json={ "id": "f90895d9cf484d9182c6dbbef2632329", @@ -21,7 +21,7 @@ async def setup_before_prep(self, module_test): "type": "User", }, ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://hub.docker.com/v2/repositories/blacklanternsecurity?page_size=25&page=1", json={ "count": 2, diff --git a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py index 65835e9492..aee30f5ca7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py +++ b/bbot/test/test_step_2/module_tests/test_module_dotnetnuke.py @@ -18,7 +18,7 @@ class TestDotnetnuke(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "dotnetnuke"] + modules_overrides = ["http", "dotnetnuke"] config_overrides = {"interactsh_disable": "True"} exploit_probe = { @@ -133,7 +133,7 @@ def extract_subdomain_tag(data): class TestDotnetnuke_blindssrf(ModuleTestBase): targets = ["http://127.0.0.1:8888"] module_name = "dotnetnuke" - modules_overrides = ["httpx", "dotnetnuke"] + modules_overrides = ["http", "dotnetnuke"] config_overrides = { "interactsh_disable": False, } diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 9b09890cff..a3bc55b7a2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -1,9 +1,28 @@ -import httpx +import json +import time import asyncio +import ssl +from urllib.request import urlopen, Request +from urllib.error import URLError +from base64 import b64encode from .base import ModuleTestBase +def _elastic_request(method, url, body=None): + """Make a request to Elasticsearch with basic auth, ignoring SSL verification.""" + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + creds = b64encode(b"elastic:bbotislife").decode() + headers = {"Authorization": f"Basic {creds}"} + if body is not None: + headers["Content-Type"] = "application/json" + body = json.dumps(body).encode() if isinstance(body, dict) else body.encode() + req = Request(url, data=body, headers=headers, method=method) + return urlopen(req, context=ctx) + + class TestElastic(ModuleTestBase): config_overrides = { "modules": { @@ -39,19 +58,20 @@ async def setup_before_prep(self, module_test): ) # Connect to Elasticsearch with retry logic - async with httpx.AsyncClient(verify=False) as client: - while True: - try: - # Attempt a simple operation to confirm the connection - response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) - response.raise_for_status() - break - except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...") - await asyncio.sleep(0.5) - - # Ensure the index is empty - await client.delete("https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + while True: + try: + response = _elastic_request("GET", "https://localhost:9200/_cat/health") + response.read() + break + except (URLError, ConnectionError, OSError) as e: + self.log.verbose(f"Connection failed: {e}. Retrying...") + time.sleep(0.5) + + # Ensure the index is empty + try: + _elastic_request("DELETE", "https://localhost:9200/bbot_test_events") + except URLError: + pass # Index might not exist yet async def check(self, module_test, events): try: @@ -60,63 +80,55 @@ async def check(self, module_test, events): events_json = [e.json() for e in events] events_json.sort(key=lambda x: x["timestamp"]) - # Connect to Elasticsearch - async with httpx.AsyncClient(verify=False) as client: - # Fetch all events from the index - response = await client.get( - "https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") - ) - response_json = response.json() - db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] - - # make sure we have the same number of events - assert len(events_json) == len(db_events) - - for db_event in db_events: - assert isinstance(db_event["timestamp"], float) - assert isinstance(db_event["inserted_at"], float) - - # Convert to Pydantic objects and dump them - db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] - db_events_pydantic.sort(key=lambda x: x["timestamp"]) - - # Find the main event with type DNS_NAME and data blacklanternsecurity.com - main_event = next( - ( - e - for e in db_events_pydantic - if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" - ), - None, - ) - assert main_event is not None, ( - "Main event with type DNS_NAME and data blacklanternsecurity.com not found" - ) - - # Ensure it has the reverse_host attribute - expected_reverse_host = "blacklanternsecurity.com"[::-1] - assert main_event.get("reverse_host") == expected_reverse_host, ( - f"reverse_host attribute is not correct, expected {expected_reverse_host}" - ) - - # Events don't match exactly because the elastic ones have reverse_host and inserted_at - assert events_json != db_events_pydantic - for db_event in db_events_pydantic: - db_event.pop("reverse_host", None) - db_event.pop("inserted_at", None) - db_event.pop("archived", None) - # They should match after removing reverse_host - assert events_json == db_events_pydantic, "Events do not match" + # Fetch all events from the index + response = _elastic_request("GET", "https://localhost:9200/bbot_test_events/_search?size=100") + response_json = json.loads(response.read()) + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert main_event.get("reverse_host") == expected_reverse_host, ( + f"reverse_host attribute is not correct, expected {expected_reverse_host}" + ) + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host", None) + db_event.pop("inserted_at", None) + db_event.pop("archived", None) + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" finally: # Clean up: Delete all documents in the index - async with httpx.AsyncClient(verify=False) as client: - response = await client.delete( - "https://localhost:9200/bbot_test_events", - auth=("elastic", "bbotislife"), - params={"ignore": "400,404"}, - ) - self.log.verbose("Deleted documents from index") + try: + _elastic_request("DELETE", "https://localhost:9200/bbot_test_events?ignore=400,404") + except URLError: + pass + self.log.verbose("Deleted documents from index") process = await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) diff --git a/bbot/test/test_step_2/module_tests/test_module_emailformat.py b/bbot/test/test_step_2/module_tests/test_module_emailformat.py index 4f3189f516..66828991d6 100644 --- a/bbot/test/test_step_2/module_tests/test_module_emailformat.py +++ b/bbot/test/test_step_2/module_tests/test_module_emailformat.py @@ -3,7 +3,7 @@ class TestEmailFormat(ModuleTestBase): async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://www.email-format.com/d/blacklanternsecurity.com/", text="""[email protected]""", ) diff --git a/bbot/test/test_step_2/module_tests/test_module_emails.py b/bbot/test/test_step_2/module_tests/test_module_emails.py index 820b5a15f1..88d40f938a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_emails.py +++ b/bbot/test/test_step_2/module_tests/test_module_emails.py @@ -5,11 +5,11 @@ class TestEmails(ModuleTestBase): modules_overrides = ["emails", "emailformat", "skymem"] async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://www.email-format.com/d/blacklanternsecurity.com/", text="""[email protected]""", ) - module_test.httpx_mock.add_response( + module_test.blasthttp_mock.add_response( url="https://www.skymem.info/srch?q=blacklanternsecurity.com", text="

info@blacklanternsecurity.com

", ) diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 4834d898ca..6d56911723 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -10,7 +10,7 @@ class TestExcavate(ModuleTestBase): targets = ["http://127.0.0.1:8888/", "test.notreal", "http://127.0.0.1:8888/subdir/links.html"] - modules_overrides = ["excavate", "httpx"] + modules_overrides = ["excavate", "http"] config_overrides = {"web": {"spider_distance": 1, "spider_depth": 1}, "omit_event_types": []} async def setup_before_prep(self, module_test): @@ -28,7 +28,7 @@ async def setup_before_prep(self, module_test): Help
  • 16x50 UART Driver
  • - # these ones should get emitted as URL_UNVERIFIED events (processed by httpx which has accept_js_url=True) + # these ones should get emitted as URL_UNVERIFIED events (processed by http module which has accept_js_url=True) """ @@ -77,7 +77,7 @@ def check(self, module_test, events): assert "www6.test.notreal" in event_data assert "www7.test.notreal" in event_data assert "www8.test.notreal" in event_data - # .js files should be emitted as URL_UNVERIFIED events (they are processed by httpx which has accept_js_url=True) + # .js files should be emitted as URL_UNVERIFIED events (they are processed by http module which has accept_js_url=True) # they are seen by internal modules but not by output modules assert "http://127.0.0.1:8888/a_relative.js" not in event_data assert "http://127.0.0.1:8888/link_relative.js" not in event_data @@ -190,7 +190,7 @@ def check(self, module_test, events): class TestExcavateInScopeJavascript(TestExcavate): targets = ["http://127.0.0.1:8888/"] - modules_overrides = ["excavate", "httpx", "badsecrets"] + modules_overrides = ["excavate", "http", "badsecrets"] async def setup_before_prep(self, module_test): module_test.httpserver.expect_request("/").respond_with_data( @@ -500,7 +500,7 @@ def check(self, module_test, events): class TestExcavateParameterExtraction(TestExcavate): # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["excavate", "httpx", "hunt"] + modules_overrides = ["excavate", "http", "hunt"] targets = ["http://127.0.0.1:8888/"] parameter_extraction_html = """ @@ -647,7 +647,7 @@ class TestExcavateParameterExtraction_postform_noaction(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] postform_extract_html = """

    Post for without action

    @@ -713,7 +713,7 @@ class TestExcavateParameterExtraction_additionalparams(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] postformnoaction_extract_multiparams_html = """

    Post for without action

    @@ -774,7 +774,7 @@ class TestExcavateParameterExtraction_getparam(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] getparam_extract_html = """
    ping """ @@ -796,7 +796,7 @@ class TestExcavateParameterExtraction_relativeurl(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] config_overrides = {"web": {"spider_distance": 2, "spider_depth": 3}} # Secondary page that has a relative link to a traversal URL @@ -870,7 +870,7 @@ def check(self, module_test, events): class TestExcavateParameterExtraction_json(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] - modules_overrides = ["httpx", "excavate", "paramminer_getparams"] + modules_overrides = ["http", "excavate", "paramminer_getparams"] config_overrides = { "modules": { "excavate": {"speculate_params": True}, @@ -902,7 +902,7 @@ def check(self, module_test, events): class TestExcavateParameterExtraction_xml(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] - modules_overrides = ["httpx", "excavate", "paramminer_getparams"] + modules_overrides = ["http", "excavate", "paramminer_getparams"] config_overrides = { "modules": { "excavate": {"speculate_params": True}, @@ -960,7 +960,7 @@ class TestExcavateParameterExtraction_inputtagnovalue(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] # hunt is added as parameter extraction is only activated by one or more modules that consume WEB_PARAMETER - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] getparam_extract_html = """ """ @@ -980,7 +980,7 @@ def check(self, module_test, events): class TestExcavateParameterExtraction_jqueryjsonajax(ModuleTestBase): targets = ["http://127.0.0.1:8888/"] - modules_overrides = ["httpx", "excavate", "hunt"] + modules_overrides = ["http", "excavate", "hunt"] jsonajax_extract_html = """