From a9ec5051e2f84b0b21f6f43cadbfcd1320ff482b Mon Sep 17 00:00:00 2001 From: Doron Somech Date: Tue, 13 Aug 2024 00:06:57 +0300 Subject: [PATCH] remote cache logger - new plugin to help debug the remote cache --- .../using-pants/key-concepts/backends.mdx | 1 + pants.toml | 1 + .../tools/remote_cache_logger/BUILD | 3 + .../tools/remote_cache_logger/__init__.py | 0 .../tools/remote_cache_logger/register.py | 10 ++ .../backend/tools/remote_cache_logger/BUILD | 4 + .../tools/remote_cache_logger/__init__.py | 0 .../tools/remote_cache_logger/rules.py | 94 +++++++++++++++++++ src/python/pants/bin/BUILD | 1 + .../remote/src/remote_cache.rs | 51 ++++++++++ 10 files changed, 165 insertions(+) create mode 100644 src/python/pants/backend/experimental/tools/remote_cache_logger/BUILD create mode 100644 src/python/pants/backend/experimental/tools/remote_cache_logger/__init__.py create mode 100644 src/python/pants/backend/experimental/tools/remote_cache_logger/register.py create mode 100644 src/python/pants/backend/tools/remote_cache_logger/BUILD create mode 100644 src/python/pants/backend/tools/remote_cache_logger/__init__.py create mode 100644 src/python/pants/backend/tools/remote_cache_logger/rules.py diff --git a/docs/docs/using-pants/key-concepts/backends.mdx b/docs/docs/using-pants/key-concepts/backends.mdx index 66192148cbb..2bfd220b59f 100644 --- a/docs/docs/using-pants/key-concepts/backends.mdx +++ b/docs/docs/using-pants/key-concepts/backends.mdx @@ -120,3 +120,4 @@ The list of all backends (both stable and experimental) is also available via `p | `pants.backend.experimental.visibility` | Enables `__dependencies_rules__` and `__dependents_rules__` | [Visibility](../validating-dependencies) | | `pants.backend.python.providers.experimental.pyenv` | Enables Pants to manage appropriate Python interpreters via pyenv | | | `pants.backend.python.providers.experimental.pyenv.custom_install` | Enables customising how the pyenv provider builds a Python interpreter | | +| `pants.backend.tools.experimental.remote_cache_logger` | Enables the remote cache logger for debeugging remote cache \ No newline at end of file diff --git a/pants.toml b/pants.toml index 29a0eb0f31a..407437c1fc3 100644 --- a/pants.toml +++ b/pants.toml @@ -35,6 +35,7 @@ backend_packages.add = [ "pants.backend.experimental.scala.debug_goals", "pants.backend.experimental.tools.workunit_logger", "pants.backend.experimental.typescript", + "pants.backend.experimental.tools.remote_cache_logger", "pants.backend.experimental.visibility", "pants.backend.tools.preamble", "pants.backend.tools.taplo", diff --git a/src/python/pants/backend/experimental/tools/remote_cache_logger/BUILD b/src/python/pants/backend/experimental/tools/remote_cache_logger/BUILD new file mode 100644 index 00000000000..752b6d575e9 --- /dev/null +++ b/src/python/pants/backend/experimental/tools/remote_cache_logger/BUILD @@ -0,0 +1,3 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +python_sources() diff --git a/src/python/pants/backend/experimental/tools/remote_cache_logger/__init__.py b/src/python/pants/backend/experimental/tools/remote_cache_logger/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/python/pants/backend/experimental/tools/remote_cache_logger/register.py b/src/python/pants/backend/experimental/tools/remote_cache_logger/register.py new file mode 100644 index 00000000000..56637071f27 --- /dev/null +++ b/src/python/pants/backend/experimental/tools/remote_cache_logger/register.py @@ -0,0 +1,10 @@ +# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +from __future__ import annotations + +from pants.backend.tools.remote_cache_logger import rules as remote_cache_logger_rules + + +def rules(): + return remote_cache_logger_rules.rules() diff --git a/src/python/pants/backend/tools/remote_cache_logger/BUILD b/src/python/pants/backend/tools/remote_cache_logger/BUILD new file mode 100644 index 00000000000..68aaac88424 --- /dev/null +++ b/src/python/pants/backend/tools/remote_cache_logger/BUILD @@ -0,0 +1,4 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). + +python_sources() diff --git a/src/python/pants/backend/tools/remote_cache_logger/__init__.py b/src/python/pants/backend/tools/remote_cache_logger/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/python/pants/backend/tools/remote_cache_logger/rules.py b/src/python/pants/backend/tools/remote_cache_logger/rules.py new file mode 100644 index 00000000000..2255a2a305b --- /dev/null +++ b/src/python/pants/backend/tools/remote_cache_logger/rules.py @@ -0,0 +1,94 @@ +# Copyright 2023 Pants project contributors (see CONTRIBUTORS.md). +# Licensed under the Apache License, Version 2.0 (see LICENSE). +import csv +import logging +from typing import Any, Dict, Mapping, Tuple + +from pants.engine.internals.scheduler import Workunit +from pants.engine.rules import collect_rules, rule +from pants.engine.streaming_workunit_handler import ( + StreamingWorkunitContext, + WorkunitsCallback, + WorkunitsCallbackFactory, + WorkunitsCallbackFactoryRequest, +) +from pants.engine.unions import UnionRule +from pants.option.option_types import BoolOption, StrOption +from pants.option.subsystem import Subsystem +from pants.util.dirutil import safe_open + +logger = logging.getLogger(__name__) + + +class RemoteCacheLoggerCallback(WorkunitsCallback): + """Configuration for RemoteCacheLogger.""" + + def __init__(self, wulogger: "RemoteCacheLogger"): + self.wulogger = wulogger + self._completed_workunits: Dict[str, Mapping[str, Any]] = {} + + + @property + def can_finish_async(self) -> bool: + return False + + def __call__( + self, + *, + completed_workunits: Tuple[Workunit, ...], + started_workunits: Tuple[Workunit, ...], + context: StreamingWorkunitContext, + finished: bool = False, + **kwargs: Any, + ) -> None: + for wu in completed_workunits: + if wu["name"] == "remote_cache_read_speculation": + # user_metadata entries are stored directly in metadata dict (not nested) + metadata = wu.get("metadata", {}) + if not metadata or "request_description" not in metadata: + # Skip workunits with empty or incomplete metadata + continue + self._completed_workunits[wu["span_id"]] = { + "description": metadata["request_description"], + "action_digest": metadata["action_digest"], + "outcome": metadata["outcome"], + "request": metadata["request"], + } + if finished: + logger.debug(f"RemoteCacheLogger finished: captured {len(self._completed_workunits)} workunits") + filepath = f"{self.wulogger.logdir}/{context.run_tracker.run_id}.csv" + with safe_open(filepath, "w", newline="") as f: + fieldnames = ["description", "action_digest", "outcome", "request"] + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(self._completed_workunits.values()) + logger.debug(f"Wrote {len(self._completed_workunits)} entries to {filepath}") + + +class RemoteCacheLoggerCallbackFactoryRequest: + """A unique request type that is installed to trigger construction of our WorkunitsCallback.""" + + +class RemoteCacheLogger(Subsystem): + options_scope = "remote-cache-logger" + help = "Remote Cache Logger subsystem. Useful for debugging remote cache." + + enabled = BoolOption("--enabled", default=False, help="Whether to enable remote cache logging.") + logdir = StrOption("--logdir", default=".pants.d/workdir", help="Where to write the log to.") + + +@rule +def construct_callback( + _: RemoteCacheLoggerCallbackFactoryRequest, + wulogger: RemoteCacheLogger, +) -> WorkunitsCallbackFactory: + return WorkunitsCallbackFactory( + lambda: RemoteCacheLoggerCallback(wulogger) if wulogger.enabled else None + ) + + +def rules(): + return [ + UnionRule(WorkunitsCallbackFactoryRequest, RemoteCacheLoggerCallbackFactoryRequest), + *collect_rules(), + ] diff --git a/src/python/pants/bin/BUILD b/src/python/pants/bin/BUILD index fbaa0799d89..f3e9132d36e 100644 --- a/src/python/pants/bin/BUILD +++ b/src/python/pants/bin/BUILD @@ -93,6 +93,7 @@ target( "src/python/pants/backend/experimental/tools/trufflehog", "src/python/pants/backend/experimental/tools/workunit_logger", "src/python/pants/backend/experimental/tools/yamllint", + "src/python/pants/backend/experimental/tools/remote_cache_logger", "src/python/pants/backend/experimental/typescript", "src/python/pants/backend/experimental/visibility", "src/python/pants/backend/google_cloud_function/python", diff --git a/src/rust/process_execution/remote/src/remote_cache.rs b/src/rust/process_execution/remote/src/remote_cache.rs index 5b71f95e864..2379f36b146 100644 --- a/src/rust/process_execution/remote/src/remote_cache.rs +++ b/src/rust/process_execution/remote/src/remote_cache.rs @@ -26,6 +26,7 @@ use process_execution::{ populate_fallible_execution_result, }; use process_execution::{EntireExecuteRequest, make_execute_request}; +use workunit_store::UserMetadataItem; // Consumers of this crate shouldn't need to worry about the exact crate structure that comes // together to make a remote cache command runner. @@ -318,6 +319,12 @@ impl CommandRunner { in_workunit!( "remote_cache_read_speculation", Level::Trace, + user_metadata = vec![( + "action_digest".to_owned(), + UserMetadataItem::String(format!("{action_digest:?}"))), + ("request_description".to_owned(), UserMetadataItem::String(request.description.clone())), + ("request".to_owned(), UserMetadataItem::String(format!("{request:?}"))), + ], |workunit| async move { tokio::select! { cache_result = &mut cache_read_future => { @@ -330,6 +337,24 @@ impl CommandRunner { } local_result = &mut local_execution_future => { workunit.increment_counter(Metric::RemoteCacheSpeculationLocalCompletedFirst, 1); + workunit.update_metadata(|initial| { + initial.map(|(initial, _)| { + ( + WorkunitMetadata { + user_metadata: initial + .user_metadata + .into_iter() + .chain(std::iter::once(( + "outcome".to_owned(), + UserMetadataItem::String("remote miss".to_owned()), + ))) + .collect(), + ..initial + }, + Level::Trace, + ) + }) + }); local_result.map(|res| (res, false)) } } @@ -366,6 +391,14 @@ impl CommandRunner { ( WorkunitMetadata { desc: initial.desc.as_ref().map(|desc| format!("Hit: {desc}")), + user_metadata: initial + .user_metadata + .into_iter() + .chain(std::iter::once(( + "outcome".to_owned(), + UserMetadataItem::String("remote hit".to_owned()), + ))) + .collect(), ..initial }, Level::Debug, @@ -374,6 +407,24 @@ impl CommandRunner { }); Ok((cached_response, true)) } else { + workunit.update_metadata(|initial| { + initial.map(|(initial, _)| { + ( + WorkunitMetadata { + user_metadata: initial + .user_metadata + .into_iter() + .chain(std::iter::once(( + "outcome".to_owned(), + UserMetadataItem::String("remote miss".to_owned()), + ))) + .collect(), + ..initial + }, + Level::Trace, + ) + }) + }); // Note that we don't increment a counter here, as there is nothing of note in this // scenario: the remote cache did not save unnecessary local work, nor was the remote // trip unusually slow such that local execution was faster.