Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/msal/token

1import json

2import threading

3import time

4import logging

5import warnings

7from .authority import canonicalize

8from .oauth2cli.oidc import decode_part, decode_id_token

9from .oauth2cli.oauth2 import Client

12logger = logging.getLogger(__name__)

13_GRANT_TYPE_BROKER = "broker"

15def is_subdict_of(small, big):

16 return dict(big, **small) == big

18def _get_username(id_token_claims):

19 return id_token_claims.get(

20 "preferred_username", # AAD

21 id_token_claims.get("upn")) # ADFS 2019

23class TokenCache(object):

24 """This is considered as a base class containing minimal cache behavior.

26 Although it maintains tokens using unified schema across all MSAL libraries,

27 this class does not serialize/persist them.

28 See subclass :class:`SerializableTokenCache` for details on serialization.

29 """

31 class CredentialType:

32 ACCESS_TOKEN = "AccessToken"

33 REFRESH_TOKEN = "RefreshToken"

34 ACCOUNT = "Account" # Not exactly a credential type, but we put it here

35 ID_TOKEN = "IdToken"

36 APP_METADATA = "AppMetadata"

38 class AuthorityType:

39 ADFS = "ADFS"

40 MSSTS = "MSSTS" # MSSTS means AAD v2 for both AAD & MSA

42 def __init__(self):

43 self._lock = threading.RLock()

44 self._cache = {}

45 self.key_makers = {

46 # Note: We have changed token key format before when ordering scopes;

47 # changing token key won't result in cache miss.

48 self.CredentialType.REFRESH_TOKEN:

49 lambda home_account_id=None, environment=None, client_id=None,

50 target=None, **ignored_payload_from_a_real_token:

51 "-".join([

52 home_account_id or "",

53 environment or "",

54 self.CredentialType.REFRESH_TOKEN,

55 client_id or "",

56 "", # RT is cross-tenant in AAD

57 target or "", # raw value could be None if deserialized from other SDK

58 ]).lower(),

59 self.CredentialType.ACCESS_TOKEN:

60 lambda home_account_id=None, environment=None, client_id=None,

61 realm=None, target=None,

62 # Note: New field(s) can be added here

63 #key_id=None,

64 **ignored_payload_from_a_real_token:

65 "-".join([ # Note: Could use a hash here to shorten key length

66 home_account_id or "",

67 environment or "",

68 self.CredentialType.ACCESS_TOKEN,

69 client_id or "",

70 realm or "",

71 target or "",

72 #key_id or "", # So ATs of different key_id can coexist

73 ]).lower(),

74 self.CredentialType.ID_TOKEN:

75 lambda home_account_id=None, environment=None, client_id=None,

76 realm=None, **ignored_payload_from_a_real_token:

77 "-".join([

78 home_account_id or "",

79 environment or "",

80 self.CredentialType.ID_TOKEN,

81 client_id or "",

82 realm or "",

83 "" # Albeit irrelevant, schema requires an empty scope here

84 ]).lower(),

85 self.CredentialType.ACCOUNT:

86 lambda home_account_id=None, environment=None, realm=None,

87 **ignored_payload_from_a_real_entry:

88 "-".join([

89 home_account_id or "",

90 environment or "",

91 realm or "",

92 ]).lower(),

93 self.CredentialType.APP_METADATA:

94 lambda environment=None, client_id=None, **kwargs:

95 "appmetadata-{}-{}".format(environment or "", client_id or ""),

96 }

98 def _get_access_token(

99 self,

100 home_account_id, environment, client_id, realm, target, # Together they form a compound key

101 default=None,

102 ): # O(1)

103 return self._get(

104 self.CredentialType.ACCESS_TOKEN,

105 self.key_makers[TokenCache.CredentialType.ACCESS_TOKEN](

106 home_account_id=home_account_id,

107 environment=environment,

108 client_id=client_id,

109 realm=realm,

110 target=" ".join(target),

111 ),

112 default=default)

113

114 def _get_app_metadata(self, environment, client_id, default=None): # O(1)

115 return self._get(

116 self.CredentialType.APP_METADATA,

117 self.key_makers[TokenCache.CredentialType.APP_METADATA](

118 environment=environment,

119 client_id=client_id,

120 ),

121 default=default)

122

123 def _get(self, credential_type, key, default=None): # O(1)

124 with self._lock:

125 return self._cache.get(credential_type, {}).get(key, default)

126

127 @staticmethod

128 def _is_matching(entry: dict, query: dict, target_set: set = None) -> bool:

129 return is_subdict_of(query or {}, entry) and (

130 target_set <= set(entry.get("target", "").split())

131 if target_set else True)

132

133 def search(self, credential_type, target=None, query=None, *, now=None): # O(n) generator

134 """Returns a generator of matching entries.

135

136 It is O(1) for AT hits, and O(n) for other types.

137 Note that it holds a lock during the entire search.

138 """

139 target = sorted(target or []) # Match the order sorted by add()

140 assert isinstance(target, list), "Invalid parameter type"

141

142 preferred_result = None

143 if (credential_type == self.CredentialType.ACCESS_TOKEN

144 and isinstance(query, dict)

145 and "home_account_id" in query and "environment" in query

146 and "client_id" in query and "realm" in query and target

147 ): # Special case for O(1) AT lookup

148 preferred_result = self._get_access_token(

149 query["home_account_id"], query["environment"],

150 query["client_id"], query["realm"], target)

151 if preferred_result and self._is_matching(

152 preferred_result, query,

153 # Needs no target_set here because it is satisfied by dict key

154 ):

155 yield preferred_result

156

157 target_set = set(target)

158 with self._lock:

159 # O(n) search. The key is NOT used in search.

160 now = int(time.time() if now is None else now)

161 expired_access_tokens = [

162 # Especially when/if we key ATs by ephemeral fields such as key_id,

163 # stale ATs keyed by an old key_id would stay forever.

164 # Here we collect them for their removal.

165 ]

166 for entry in self._cache.get(credential_type, {}).values():

167 if ( # Automatically delete expired access tokens

168 credential_type == self.CredentialType.ACCESS_TOKEN

169 and int(entry["expires_on"]) < now

170 ):

171 expired_access_tokens.append(entry) # Can't delete them within current for-loop

172 continue

173 if (entry != preferred_result # Avoid yielding the same entry twice

174 and self._is_matching(entry, query, target_set=target_set)

175 ):

176 yield entry

177 for at in expired_access_tokens:

178 self.remove_at(at)

179

180 def find(self, credential_type, target=None, query=None, *, now=None):

181 """Equivalent to list(search(...))."""

182 warnings.warn(

183 "Use list(search(...)) instead to explicitly get a list.",

184 DeprecationWarning)

185 return list(self.search(credential_type, target=target, query=query, now=now))

186

187 def add(self, event, now=None):

188 """Handle a token obtaining event, and add tokens into cache."""

189 def make_clean_copy(dictionary, sensitive_fields): # Masks sensitive info

190 return {

191 k: "********" if k in sensitive_fields else v

192 for k, v in dictionary.items()

193 }

194 clean_event = dict(

195 event,

196 data=make_clean_copy(event.get("data", {}), (

197 "password", "client_secret", "refresh_token", "assertion",

198 )),

199 response=make_clean_copy(event.get("response", {}), (

200 "id_token_claims", # Provided by broker

201 "access_token", "refresh_token", "id_token", "username",

202 )),

203 )

204 logger.debug("event=%s", json.dumps(

205 # We examined and concluded that this log won't have Log Injection risk,

206 # because the event payload is already in JSON so CR/LF will be escaped.

207 clean_event,

208 indent=4, sort_keys=True,

209 default=str, # assertion is in bytes in Python 3

210 ))

211 return self.__add(event, now=now)

212

213 def __parse_account(self, response, id_token_claims):

214 """Return client_info and home_account_id"""

215 if "client_info" in response: # It happens when client_info and profile are in request

216 client_info = json.loads(decode_part(response["client_info"]))

217 if "uid" in client_info and "utid" in client_info:

218 return client_info, "{uid}.{utid}".format(**client_info)

219 # https://github.com/AzureAD/microsoft-authentication-library-for-python/issues/387

220 if id_token_claims: # This would be an end user on ADFS-direct scenario

221 sub = id_token_claims["sub"] # "sub" always exists, per OIDC specs

222 return {"uid": sub}, sub

223 # client_credentials flow will reach this code path

224 return {}, None

225

226 def __add(self, event, now=None):

227 # event typically contains: client_id, scope, token_endpoint,

228 # response, params, data, grant_type

229 environment = realm = None

230 if "token_endpoint" in event:

231 _, environment, realm = canonicalize(event["token_endpoint"])

232 if "environment" in event: # Always available unless in legacy test cases

233 environment = event["environment"] # Set by application.py

234 response = event.get("response", {})

235 data = event.get("data", {})

236 access_token = response.get("access_token")

237 refresh_token = response.get("refresh_token")

238 id_token = response.get("id_token")

239 id_token_claims = response.get("id_token_claims") or ( # Prefer the claims from broker

240 # Only use decode_id_token() when necessary, it contains time-sensitive validation

241 decode_id_token(id_token, client_id=event["client_id"]) if id_token else {})

242 client_info, home_account_id = self.__parse_account(response, id_token_claims)

243

244 target = ' '.join(sorted(event.get("scope") or [])) # Schema should have required sorting

245

246 with self._lock:

247 now = int(time.time() if now is None else now)

248

249 if access_token:

250 default_expires_in = ( # https://www.rfc-editor.org/rfc/rfc6749#section-5.1

251 int(response.get("expires_on")) - now # Some Managed Identity emits this

252 ) if response.get("expires_on") else 600

253 expires_in = int( # AADv1-like endpoint returns a string

254 response.get("expires_in", default_expires_in))

255 ext_expires_in = int( # AADv1-like endpoint returns a string

256 response.get("ext_expires_in", expires_in))

257 at = {

258 "credential_type": self.CredentialType.ACCESS_TOKEN,

259 "secret": access_token,

260 "home_account_id": home_account_id,

261 "environment": environment,

262 "client_id": event.get("client_id"),

263 "target": target,

264 "realm": realm,

265 "token_type": response.get("token_type", "Bearer"),

266 "cached_at": str(now), # Schema defines it as a string

267 "expires_on": str(now + expires_in), # Same here

268 "extended_expires_on": str(now + ext_expires_in) # Same here

269 }

270 at.update({k: data[k] for k in data if k in {

271 # Also store extra data which we explicitly allow

272 # So that we won't accidentally store a user's password etc.

273 "key_id", # It happens in SSH-cert or POP scenario

274 }})

275 if "refresh_in" in response:

276 refresh_in = response["refresh_in"] # It is an integer

277 at["refresh_on"] = str(now + refresh_in) # Schema wants a string

278 self.modify(self.CredentialType.ACCESS_TOKEN, at, at)

279

280 if client_info and not event.get("skip_account_creation"):

281 account = {

282 "home_account_id": home_account_id,

283 "environment": environment,

284 "realm": realm,

285 "local_account_id": event.get(

286 "_account_id", # Came from mid-tier code path.

287 # Emperically, it is the oid in AAD or cid in MSA.

288 id_token_claims.get("oid", id_token_claims.get("sub"))),

289 "username": _get_username(id_token_claims)

290 or data.get("username") # Falls back to ROPC username

291 or event.get("username") # Falls back to Federated ROPC username

292 or "", # The schema does not like null

293 "authority_type": event.get(

294 "authority_type", # Honor caller's choice of authority_type

295 self.AuthorityType.ADFS if realm == "adfs"

296 else self.AuthorityType.MSSTS),

297 # "client_info": response.get("client_info"), # Optional

298 }

299 grant_types_that_establish_an_account = (

300 _GRANT_TYPE_BROKER, "authorization_code", "password",

301 Client.DEVICE_FLOW["GRANT_TYPE"])

302 if event.get("grant_type") in grant_types_that_establish_an_account:

303 account["account_source"] = event["grant_type"]

304 self.modify(self.CredentialType.ACCOUNT, account, account)

305

306 if id_token:

307 idt = {

308 "credential_type": self.CredentialType.ID_TOKEN,

309 "secret": id_token,

310 "home_account_id": home_account_id,

311 "environment": environment,

312 "realm": realm,

313 "client_id": event.get("client_id"),

314 # "authority": "it is optional",

315 }

316 self.modify(self.CredentialType.ID_TOKEN, idt, idt)

317

318 if refresh_token:

319 rt = {

320 "credential_type": self.CredentialType.REFRESH_TOKEN,

321 "secret": refresh_token,

322 "home_account_id": home_account_id,

323 "environment": environment,

324 "client_id": event.get("client_id"),

325 "target": target, # Optional per schema though

326 "last_modification_time": str(now), # Optional. Schema defines it as a string.

327 }

328 if "foci" in response:

329 rt["family_id"] = response["foci"]

330 self.modify(self.CredentialType.REFRESH_TOKEN, rt, rt)

331

332 app_metadata = {

333 "client_id": event.get("client_id"),

334 "environment": environment,

335 }

336 if "foci" in response:

337 app_metadata["family_id"] = response.get("foci")

338 self.modify(self.CredentialType.APP_METADATA, app_metadata, app_metadata)

339

340 def modify(self, credential_type, old_entry, new_key_value_pairs=None):

341 # Modify the specified old_entry with new_key_value_pairs,

342 # or remove the old_entry if the new_key_value_pairs is None.

343

344 # This helper exists to consolidate all token add/modify/remove behaviors,

345 # so that the sub-classes will have only one method to work on,

346 # instead of patching a pair of update_xx() and remove_xx() per type.

347 # You can monkeypatch self.key_makers to support more types on-the-fly.

348 key = self.key_makers[credential_type](**old_entry)

349 with self._lock:

350 if new_key_value_pairs: # Update with them

351 entries = self._cache.setdefault(credential_type, {})

352 entries[key] = dict(

353 old_entry, # Do not use entries[key] b/c it might not exist

354 **new_key_value_pairs)

355 else: # Remove old_entry

356 self._cache.setdefault(credential_type, {}).pop(key, None)

357

358 def remove_rt(self, rt_item):

359 assert rt_item.get("credential_type") == self.CredentialType.REFRESH_TOKEN

360 return self.modify(self.CredentialType.REFRESH_TOKEN, rt_item)

361

362 def update_rt(self, rt_item, new_rt):

363 assert rt_item.get("credential_type") == self.CredentialType.REFRESH_TOKEN

364 return self.modify(self.CredentialType.REFRESH_TOKEN, rt_item, {

365 "secret": new_rt,

366 "last_modification_time": str(int(time.time())), # Optional. Schema defines it as a string.

367 })

368

369 def remove_at(self, at_item):

370 assert at_item.get("credential_type") == self.CredentialType.ACCESS_TOKEN

371 return self.modify(self.CredentialType.ACCESS_TOKEN, at_item)

372

373 def remove_idt(self, idt_item):

374 assert idt_item.get("credential_type") == self.CredentialType.ID_TOKEN

375 return self.modify(self.CredentialType.ID_TOKEN, idt_item)

376

377 def remove_account(self, account_item):

378 assert "authority_type" in account_item

379 return self.modify(self.CredentialType.ACCOUNT, account_item)

380

381

382class SerializableTokenCache(TokenCache):

383 """This serialization can be a starting point to implement your own persistence.

384

385 This class does NOT actually persist the cache on disk/db/etc..

386 Depending on your need,

387 the following simple recipe for file-based, unencrypted persistence may be sufficient::

388

389 import os, atexit, msal

390 cache_filename = os.path.join( # Persist cache into this file

391 os.getenv(

392 # Automatically wipe out the cache from Linux when user's ssh session ends.

393 # See also https://github.com/AzureAD/microsoft-authentication-library-for-python/issues/690

394 "XDG_RUNTIME_DIR", ""),

395 "my_cache.bin")

396 cache = msal.SerializableTokenCache()

397 if os.path.exists(cache_filename):

398 cache.deserialize(open(cache_filename, "r").read())

399 atexit.register(lambda:

400 open(cache_filename, "w").write(cache.serialize())

401 # Hint: The following optional line persists only when state changed

402 if cache.has_state_changed else None

403 )

404 app = msal.ClientApplication(..., token_cache=cache)

405 ...

406

407 Alternatively, you may use a more sophisticated cache persistence library,

408 `MSAL Extensions <https://github.com/AzureAD/microsoft-authentication-extensions-for-python>`_,

409 which provides token cache persistence with encryption, and more.

410

411 :var bool has_state_changed:

412 Indicates whether the cache state in the memory has changed since last

413 :func:`~serialize` or :func:`~deserialize` call.

414 """

415 has_state_changed = False

416

417 def add(self, event, **kwargs):

418 super(SerializableTokenCache, self).add(event, **kwargs)

419 self.has_state_changed = True

420

421 def modify(self, credential_type, old_entry, new_key_value_pairs=None):

422 super(SerializableTokenCache, self).modify(

423 credential_type, old_entry, new_key_value_pairs)

424 self.has_state_changed = True

425

426 def deserialize(self, state):

427 # type: (Optional[str]) -> None

428 """Deserialize the cache from a state previously obtained by serialize()"""

429 with self._lock:

430 self._cache = json.loads(state) if state else {}

431 self.has_state_changed = False # reset

432

433 def serialize(self):

434 # type: () -> str

435 """Serialize the current cache state into a string."""

436 with self._lock:

437 self.has_state_changed = False

438 return json.dumps(self._cache, indent=4)

439

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/msal/token_cache.py: 28%

158 statements