Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions src/google/adk/telemetry/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,14 @@ def trace_generate_content_result(span: Span | None, llm_response: LlmResponse):
span.set_attribute(
GEN_AI_USAGE_OUTPUT_TOKENS, usage_metadata.candidates_token_count
)
try:
if usage_metadata.thoughts_token_count is not None:
span.set_attribute(
'gen_ai.usage.experimental.reasoning_tokens',
usage_metadata.thoughts_token_count,
)
except AttributeError:
pass

otel_logger.emit(
LogRecord(
Expand All @@ -768,25 +776,35 @@ def trace_inference_result(
gc_span = None
if isinstance(span, GenerateContentSpan):
gc_span = span
span = gc_span.span
otel_span = gc_span.span
else:
otel_span = span

if span is None:
if otel_span is None:
return

if llm_response.partial:
return

if finish_reason := llm_response.finish_reason:
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason.lower()])
otel_span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason.lower()])
if usage_metadata := llm_response.usage_metadata:
if usage_metadata.prompt_token_count is not None:
span.set_attribute(
otel_span.set_attribute(
GEN_AI_USAGE_INPUT_TOKENS, usage_metadata.prompt_token_count
)
if usage_metadata.candidates_token_count is not None:
span.set_attribute(
otel_span.set_attribute(
GEN_AI_USAGE_OUTPUT_TOKENS, usage_metadata.candidates_token_count
)
try:
if usage_metadata.thoughts_token_count is not None:
otel_span.set_attribute(
'gen_ai.usage.experimental.reasoning_tokens',
usage_metadata.thoughts_token_count,
)
except AttributeError:
pass

if is_experimental_semconv() and isinstance(gc_span, GenerateContentSpan):
set_operation_details_attributes_from_response(
Expand Down
105 changes: 105 additions & 0 deletions tests/unittests/telemetry/test_spans.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from google.adk.telemetry.tracing import ADK_CAPTURE_MESSAGE_CONTENT_IN_SPANS
from google.adk.telemetry.tracing import trace_agent_invocation
from google.adk.telemetry.tracing import trace_call_llm
from google.adk.telemetry.tracing import trace_generate_content_result
from google.adk.telemetry.tracing import trace_inference_result
from google.adk.telemetry.tracing import trace_merged_tool_calls
from google.adk.telemetry.tracing import trace_send_data
Expand Down Expand Up @@ -1284,3 +1285,107 @@ def test_trace_tool_call_with_standard_error(
mock.call('error.type', 'ValueError')
in mock_span_fixture.set_attribute.call_args_list
)


def test_trace_inference_result_with_thinking_tokens(mock_span_fixture):
"""Test trace_inference_result exports thoughts_token_count."""
llm_response = LlmResponse(
turn_complete=True,
finish_reason=types.FinishReason.STOP,
usage_metadata=types.GenerateContentResponseUsageMetadata(
total_token_count=110,
prompt_token_count=50,
candidates_token_count=10,
thoughts_token_count=50,
),
)

trace_inference_result(mock_span_fixture, llm_response)

mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50)
mock_span_fixture.set_attribute.assert_any_call(
GEN_AI_USAGE_OUTPUT_TOKENS, 10
)
mock_span_fixture.set_attribute.assert_any_call(
'gen_ai.usage.experimental.reasoning_tokens', 50
)


def test_trace_inference_result_without_thinking_tokens(mock_span_fixture):
"""Test trace_inference_result works when thoughts_token_count is None."""
llm_response = LlmResponse(
turn_complete=True,
finish_reason=types.FinishReason.STOP,
usage_metadata=types.GenerateContentResponseUsageMetadata(
total_token_count=60,
prompt_token_count=50,
candidates_token_count=10,
),
)

trace_inference_result(mock_span_fixture, llm_response)

mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50)
mock_span_fixture.set_attribute.assert_any_call(
GEN_AI_USAGE_OUTPUT_TOKENS, 10
)
# Verify reasoning_tokens is NOT set when thoughts_token_count is None
reasoning_calls = [
call
for call in mock_span_fixture.set_attribute.call_args_list
if call.args[0] == 'gen_ai.usage.experimental.reasoning_tokens'
]
assert len(reasoning_calls) == 0


def test_trace_generate_content_result_with_thinking_tokens(mock_span_fixture):
"""Test trace_generate_content_result exports thoughts_token_count."""
llm_response = LlmResponse(
turn_complete=True,
finish_reason=types.FinishReason.STOP,
usage_metadata=types.GenerateContentResponseUsageMetadata(
total_token_count=110,
prompt_token_count=50,
candidates_token_count=10,
thoughts_token_count=50,
),
)

trace_generate_content_result(mock_span_fixture, llm_response)

mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50)
mock_span_fixture.set_attribute.assert_any_call(
GEN_AI_USAGE_OUTPUT_TOKENS, 10
)
mock_span_fixture.set_attribute.assert_any_call(
'gen_ai.usage.experimental.reasoning_tokens', 50
)


def test_trace_generate_content_result_without_thinking_tokens(
mock_span_fixture,
):
"""Test trace_generate_content_result works when thoughts_token_count is None."""
llm_response = LlmResponse(
turn_complete=True,
finish_reason=types.FinishReason.STOP,
usage_metadata=types.GenerateContentResponseUsageMetadata(
total_token_count=60,
prompt_token_count=50,
candidates_token_count=10,
),
)

trace_generate_content_result(mock_span_fixture, llm_response)

mock_span_fixture.set_attribute.assert_any_call(GEN_AI_USAGE_INPUT_TOKENS, 50)
mock_span_fixture.set_attribute.assert_any_call(
GEN_AI_USAGE_OUTPUT_TOKENS, 10
)
# Verify reasoning_tokens is NOT set when thoughts_token_count is None
reasoning_calls = [
call
for call in mock_span_fixture.set_attribute.call_args_list
if call.args[0] == 'gen_ai.usage.experimental.reasoning_tokens'
]
assert len(reasoning_calls) == 0