Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### New Features and Improvements

### Bug Fixes
* Fixed Databricks CLI authentication to detect when the cached token's scopes don't match the SDK's configured scopes. Previously, a scope mismatch was silently ignored, causing requests to use wrong permissions. The SDK now raises an error with instructions to re-authenticate.

### Security Vulnerabilities

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@

import com.databricks.sdk.core.oauth.CachedTokenSource;
import com.databricks.sdk.core.oauth.OAuthHeaderFactory;
import com.databricks.sdk.core.oauth.Token;
import com.databricks.sdk.core.oauth.TokenSource;
import com.databricks.sdk.core.utils.OSUtils;
import com.databricks.sdk.support.InternalApi;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.nio.charset.StandardCharsets;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -15,6 +20,22 @@ public class DatabricksCliCredentialsProvider implements CredentialsProvider {

public static final String DATABRICKS_CLI = "databricks-cli";

private static final ObjectMapper MAPPER = new ObjectMapper();

/** Thrown when the cached CLI token's scopes don't match the SDK's configured scopes. */
static class ScopeMismatchException extends DatabricksException {
ScopeMismatchException(String message) {
super(message);
}
}

/**
* offline_access controls whether the IdP issues a refresh token. It does not grant any API
* permissions, so its presence or absence should not cause a scope mismatch error.
*/
private static final Set<String> SCOPES_IGNORED_FOR_COMPARISON =
Collections.singleton("offline_access");

@Override
public String authType() {
return DATABRICKS_CLI;
Expand Down Expand Up @@ -92,13 +113,41 @@ public OAuthHeaderFactory configure(DatabricksConfig config) {
return null;
}

// Wrap the token source with scope validation so that every token — both the
// initial fetch and subsequent refreshes — is checked against the configured scopes.
TokenSource effectiveSource;
if (config.isScopesExplicitlySet()) {
List<String> scopes = config.getScopes();
effectiveSource =
new TokenSource() {
@Override
public Token getToken() {
Token t = tokenSource.getToken();
validateTokenScopes(t, scopes, host);
return t;
}
};
} else {
effectiveSource = tokenSource;
}

CachedTokenSource cachedTokenSource =
new CachedTokenSource.Builder(tokenSource)
new CachedTokenSource.Builder(effectiveSource)
.setAsyncDisabled(config.getDisableAsyncTokenRefresh())
.build();
cachedTokenSource.getToken(); // We need this for checking if databricks CLI is installed.

return OAuthHeaderFactory.fromTokenSource(cachedTokenSource);
} catch (ScopeMismatchException e) {
// Scope validation failed. When the user explicitly selected databricks-cli auth,
// surface the mismatch immediately so they get an actionable error. When we're being
// tried as part of the default credential chain, step aside so other providers get
// a chance.
if (DATABRICKS_CLI.equals(config.getAuthType())) {
throw e;
}
LOG.warn("Databricks CLI token scope mismatch, skipping: {}", e.getMessage());
return null;
} catch (DatabricksException e) {
String stderr = e.getMessage();
if (stderr.contains("not found")) {
Expand All @@ -112,4 +161,91 @@ public OAuthHeaderFactory configure(DatabricksConfig config) {
throw e;
}
}

/**
* Validate that the token's scopes match the requested scopes from the config.
*
* <p>The {@code databricks auth token} command does not accept scopes yet. It returns whatever
* token was cached from the last {@code databricks auth login}. If a user configures specific
* scopes in the SDK config but their cached CLI token was issued with different scopes, requests
* will silently use the wrong scopes. This check surfaces that mismatch early with an actionable
* error telling the user how to re-authenticate with the correct scopes.
*/
static void validateTokenScopes(Token token, List<String> requestedScopes, String host) {
Map<String, Object> claims = getJwtClaims(token.getAccessToken());
if (claims == null) {
LOG.debug("Could not decode token as JWT to validate scopes");
return;
}

Object tokenScopesRaw = claims.get("scope");
if (tokenScopesRaw == null) {
LOG.debug("Token does not contain 'scope' claim, skipping scope validation");
return;
}

Set<String> tokenScopes = parseScopeClaim(tokenScopesRaw);
if (tokenScopes == null) {
LOG.debug("Unexpected 'scope' claim type: {}", tokenScopesRaw.getClass());
return;
}

tokenScopes.removeAll(SCOPES_IGNORED_FOR_COMPARISON);
Set<String> requested = new HashSet<>(requestedScopes);
requested.removeAll(SCOPES_IGNORED_FOR_COMPARISON);

if (!tokenScopes.equals(requested)) {
List<String> sortedTokenScopes = new ArrayList<>(tokenScopes);
Collections.sort(sortedTokenScopes);
List<String> sortedRequested = new ArrayList<>(requested);
Collections.sort(sortedRequested);

throw new ScopeMismatchException(
String.format(
"Token issued by Databricks CLI has scopes %s which do not match "
+ "the configured scopes %s. Please re-authenticate "
+ "with the desired scopes by running `databricks auth login` with the --scopes flag."
+ "Scopes default to all-apis.",
sortedTokenScopes, sortedRequested));
}
}

/**
* Decode a JWT access token and return its payload claims. Returns null if the token is not a
* valid JWT.
*/
private static Map<String, Object> getJwtClaims(String accessToken) {
String[] parts = accessToken.split("\\.");
if (parts.length != 3) {
LOG.debug("Tried to decode access token as JWT, but failed: {} components", parts.length);
return null;
}
try {
byte[] payloadBytes = Base64.getUrlDecoder().decode(parts[1]);
String payloadJson = new String(payloadBytes, StandardCharsets.UTF_8);
@SuppressWarnings("unchecked")
Map<String, Object> claims = MAPPER.readValue(payloadJson, Map.class);
return claims;
} catch (IllegalArgumentException | JsonProcessingException e) {
LOG.debug("Failed to decode JWT claims: {}", e.getMessage());
return null;
}
}

/**
* Parse the JWT "scope" claim, which can be either a space-delimited string or a JSON array.
* Returns null if the type is unexpected.
*/
private static Set<String> parseScopeClaim(Object scopeClaim) {
if (scopeClaim instanceof String) {
return new HashSet<>(Arrays.asList(((String) scopeClaim).split("\\s+")));
} else if (scopeClaim instanceof List) {
Set<String> scopes = new HashSet<>();
for (Object s : (List<?>) scopeClaim) {
scopes.add(String.valueOf(s));
}
return scopes;
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,15 @@ public DatabricksConfig setScopes(List<String> scopes) {
return this;
}

/**
* Returns true if scopes were explicitly configured (either directly in code or loaded from a
* config file). When scopes are not set, getScopes() defaults to ["all-apis"], which would cause
* false-positive mismatches during scope validation.
*/
boolean isScopesExplicitlySet() {
return scopes != null && !scopes.isEmpty();
}

public String getProfile() {
return profile;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,13 @@ void testBuildHostArgs_UnifiedHostFalse_WithAccountHost() {
CLI_PATH, "auth", "token", "--host", ACCOUNT_HOST, "--account-id", ACCOUNT_ID),
cmd);
}

@Test
void testScopesExplicitlySetFlag() {
DatabricksConfig config = new DatabricksConfig();
assertFalse(config.isScopesExplicitlySet());

config.setScopes(Arrays.asList("sql", "clusters"));
assertTrue(config.isScopesExplicitlySet());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package com.databricks.sdk.core;

import static org.junit.jupiter.api.Assertions.*;

import com.databricks.sdk.core.oauth.Token;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.*;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

class DatabricksCliScopeValidationTest {

private static final String HOST = "https://my-workspace.cloud.databricks.com";
private static final ObjectMapper MAPPER = new ObjectMapper();

/** Builds a fake JWT (header.payload.signature) with the given claims. */
private static String makeJwt(Map<String, Object> claims) {
try {
String header =
Base64.getUrlEncoder()
.withoutPadding()
.encodeToString("{\"alg\":\"none\"}".getBytes(StandardCharsets.UTF_8));
String payload =
Base64.getUrlEncoder().withoutPadding().encodeToString(MAPPER.writeValueAsBytes(claims));
return header + "." + payload + ".sig";
} catch (Exception e) {
throw new RuntimeException(e);
}
}

private static Token makeToken(Map<String, Object> claims) {
return new Token(makeJwt(claims), "Bearer", Instant.now().plusSeconds(3600));
}

static List<Arguments> scopeValidationCases() {
return Arrays.asList(
// Exact match (offline_access filtered out).
Arguments.of(
Collections.singletonMap("scope", "sql offline_access"),
Collections.singletonList("sql"),
false,
"match"),
// Mismatch throws.
Arguments.of(
Collections.singletonMap("scope", "all-apis offline_access"),
Collections.singletonList("sql"),
true,
"mismatch"),
// offline_access on token only — still equivalent.
Arguments.of(
Collections.singletonMap("scope", "all-apis offline_access"),
Collections.singletonList("all-apis"),
false,
"offline_access_on_token_only"),
// offline_access in config only — still equivalent.
Arguments.of(
Collections.singletonMap("scope", "all-apis"),
Arrays.asList("all-apis", "offline_access"),
false,
"offline_access_in_config_only"),
// Scope claim as list instead of string.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we handle both list and string?

Arguments.of(
new HashMap<String, Object>() {
{
put("scope", Arrays.asList("sql", "offline_access"));
}
},
Collections.singletonList("sql"),
false,
"scope_as_list"));
}

@ParameterizedTest(name = "{3}")
@MethodSource("scopeValidationCases")
void testScopeValidation(
Map<String, Object> tokenClaims,
List<String> configuredScopes,
boolean expectError,
String testName) {
Token token = makeToken(tokenClaims);

if (expectError) {
assertThrows(
DatabricksCliCredentialsProvider.ScopeMismatchException.class,
() ->
DatabricksCliCredentialsProvider.validateTokenScopes(token, configuredScopes, HOST));
} else {
assertDoesNotThrow(
() ->
DatabricksCliCredentialsProvider.validateTokenScopes(token, configuredScopes, HOST));
}
}

@Test
void testNoScopeClaimSkipsValidation() {
Token token = makeToken(Collections.singletonMap("sub", "user@example.com"));
assertDoesNotThrow(
() ->
DatabricksCliCredentialsProvider.validateTokenScopes(
token, Collections.singletonList("sql"), HOST));
}

@Test
void testNonJwtTokenSkipsValidation() {
Token token = new Token("opaque-token-string", "Bearer", Instant.now().plusSeconds(3600));
assertDoesNotThrow(
() ->
DatabricksCliCredentialsProvider.validateTokenScopes(
token, Collections.singletonList("sql"), HOST));
}

@Test
void testErrorMessageContainsReauthCommand() {
Token token = makeToken(Collections.singletonMap("scope", "all-apis"));
DatabricksCliCredentialsProvider.ScopeMismatchException e =
assertThrows(
DatabricksCliCredentialsProvider.ScopeMismatchException.class,
() ->
DatabricksCliCredentialsProvider.validateTokenScopes(
token, Arrays.asList("sql", "offline_access"), HOST));
assertTrue(
e.getMessage().contains("databricks auth login"),
"Expected re-auth command in error message, got: " + e.getMessage());
assertTrue(
e.getMessage().contains("do not match the configured scopes"),
"Expected scope mismatch details in error message, got: " + e.getMessage());
}
}
Loading