Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/hypothesis/database.py: 37%

355 this would usually be a `Personal Access Token <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token>`_.

356 If the repository is private, it's necessary for the token to have ``repo`` scope

357 in the case of a classic token, or ``actions:read`` in the case of a fine-grained token.

358

359

360 In most cases, this will be used

361 through the :class:`~hypothesis.database.MultiplexedDatabase`,

362 by combining a local directory-based database with this one. For example:

363

364 .. code-block:: python

365

366 local = DirectoryBasedExampleDatabase(".hypothesis/examples")

367 shared = ReadOnlyDatabase(GitHubArtifactDatabase("user", "repo"))

368

369 settings.register_profile("ci", database=local)

370 settings.register_profile("dev", database=MultiplexedDatabase(local, shared))

371 # We don't want to use the shared database in CI, only to populate its local one.

372 # which the workflow should then upload as an artifact.

373 settings.load_profile("ci" if os.environ.get("CI") else "dev")

374

375 .. note::

376 Because this database is read-only, you always need to wrap it with the

377 :class:`ReadOnlyDatabase`.

378

379 A setup like this can be paired with a GitHub Actions workflow including

380 something like the following:

381

382 .. code-block:: yaml

383

384 - name: Download example database

385 uses: dawidd6/action-download-artifact@v2.24.3

386 with:

387 name: hypothesis-example-db

388 path: .hypothesis/examples

389 if_no_artifact_found: warn

390 workflow_conclusion: completed

391

392 - name: Run tests

393 run: pytest

394

395 - name: Upload example database

396 uses: actions/upload-artifact@v3

397 if: always()

398 with:

399 name: hypothesis-example-db

400 path: .hypothesis/examples

401

402 In this workflow, we use `dawidd6/action-download-artifact <https://github.com/dawidd6/action-download-artifact>`_

403 to download the latest artifact given that the official `actions/download-artifact <https://github.com/actions/download-artifact>`_

404 does not support downloading artifacts from previous workflow runs.

405

406 The database automatically implements a simple file-based cache with a default expiration period

407 of 1 day. You can adjust this through the ``cache_timeout`` property.

408

409 For mono-repo support, you can provide a unique ``artifact_name`` (e.g. ``hypofuzz-example-db-frontend``).

410 """

411

412 def __init__(

413 self,

414 owner: str,

415 repo: str,

416 artifact_name: str = "hypothesis-example-db",

417 cache_timeout: timedelta = timedelta(days=1),

418 path: Optional[os.PathLike] = None,

419 ):

420 self.owner = owner

421 self.repo = repo

422 self.artifact_name = artifact_name

423 self.cache_timeout = cache_timeout

424

425 # Get the GitHub token from the environment

426 # It's unnecessary to use a token if the repo is public

427 self.token: Optional[str] = getenv("GITHUB_TOKEN")

428

429 if path is None:

430 self.path: Path = Path(

431 storage_directory(f"github-artifacts/{self.artifact_name}/")

432 )

433 else:

434 self.path = Path(path)

435

436 # We don't want to initialize the cache until we need to

437 self._initialized: bool = False

438 self._disabled: bool = False

439

440 # This is the path to the artifact in usage

441 # .hypothesis/github-artifacts/<artifact-name>/<modified_isoformat>.zip

442 self._artifact: Optional[Path] = None

443 # This caches the artifact structure

444 self._access_cache: Optional[Dict[PurePath, Set[PurePath]]] = None

445

446 # Message to display if user doesn't wrap around ReadOnlyDatabase

447 self._read_only_message = (

448 "This database is read-only. "

449 "Please wrap this class with ReadOnlyDatabase"

450 "i.e. ReadOnlyDatabase(GitHubArtifactDatabase(...))."

451 )

452

453 def __repr__(self) -> str:

454 return (

455 f"GitHubArtifactDatabase(owner={self.owner!r}, "

456 f"repo={self.repo!r}, artifact_name={self.artifact_name!r})"

457 )

458

459 def _prepare_for_io(self) -> None:

460 assert self._artifact is not None, "Artifact not loaded."

461

462 if self._initialized: # pragma: no cover

463 return

464

465 # Test that the artifact is valid

466 try:

467 with ZipFile(self._artifact) as f:

468 if f.testzip(): # pragma: no cover

469 raise BadZipFile

470

471 # Turns out that testzip() doesn't work quite well

472 # doing the cache initialization here instead

473 # will give us more coverage of the artifact.

474

475 # Cache the files inside each keypath

476 self._access_cache = {}

477 with ZipFile(self._artifact) as zf:

478 namelist = zf.namelist()

479 # Iterate over files in the artifact

480 for filename in namelist:

481 fileinfo = zf.getinfo(filename)

482 if fileinfo.is_dir():

483 self._access_cache[PurePath(filename)] = set()

484 else:

485 # Get the keypath from the filename

486 keypath = PurePath(filename).parent

487 # Add the file to the keypath

488 self._access_cache[keypath].add(PurePath(filename))

489 except BadZipFile:

490 warnings.warn(

491 "The downloaded artifact from GitHub is invalid. "

492 "This could be because the artifact was corrupted, "

493 "or because the artifact was not created by Hypothesis. ",

494 HypothesisWarning,

495 stacklevel=3,

496 )

497 self._disabled = True

498

499 self._initialized = True

500

501 def _initialize_db(self) -> None:

502 # Trigger warning that we suppressed earlier by intent_to_write=False

503 storage_directory(self.path.name)

504 # Create the cache directory if it doesn't exist

505 self.path.mkdir(exist_ok=True, parents=True)

506

507 # Get all artifacts

508 cached_artifacts = sorted(

509 self.path.glob("*.zip"),

510 key=lambda a: datetime.fromisoformat(a.stem.replace("_", ":")),

511 )

512

513 # Remove all but the latest artifact

514 for artifact in cached_artifacts[:-1]:

515 artifact.unlink()

516

517 try:

518 found_artifact = cached_artifacts[-1]

519 except IndexError:

520 found_artifact = None

521

522 # Check if the latest artifact is a cache hit

523 if found_artifact is not None and (

524 datetime.now(timezone.utc)

525 - datetime.fromisoformat(found_artifact.stem.replace("_", ":"))

526 < self.cache_timeout

527 ):

528 self._artifact = found_artifact

529 else:

530 # Download the latest artifact from GitHub

531 new_artifact = self._fetch_artifact()

532

533 if new_artifact:

534 if found_artifact is not None:

535 found_artifact.unlink()

536 self._artifact = new_artifact

537 elif found_artifact is not None:

538 warnings.warn(

539 "Using an expired artifact as a fallback for the database: "

540 f"{found_artifact}",

541 HypothesisWarning,

542 stacklevel=2,

543 )

544 self._artifact = found_artifact

545 else:

546 warnings.warn(

547 "Couldn't acquire a new or existing artifact. Disabling database.",

548 HypothesisWarning,

549 stacklevel=2,

550 )

551 self._disabled = True

552 return

553

554 self._prepare_for_io()

555

556 def _get_bytes(self, url: str) -> Optional[bytes]: # pragma: no cover

557 request = Request(

558 url,

559 headers={

560 "Accept": "application/vnd.github+json",

561 "X-GitHub-Api-Version": "2022-11-28 ",

562 "Authorization": f"Bearer {self.token}",

563 },

564 )

565 warning_message = None

566 response_bytes: Optional[bytes] = None

567 try:

568 with urlopen(request) as response:

569 response_bytes = response.read()

570 except HTTPError as e:

571 if e.code == 401:

572 warning_message = (

573 "Authorization failed when trying to download artifact from GitHub. "

574 "Check that you have a valid GITHUB_TOKEN set in your environment."

575 )

576 else:

577 warning_message = (

578 "Could not get the latest artifact from GitHub. "

579 "This could be because because the repository "

580 "or artifact does not exist. "

581 )

582 except URLError:

583 warning_message = "Could not connect to GitHub to get the latest artifact. "

584 except TimeoutError:

585 warning_message = (

586 "Could not connect to GitHub to get the latest artifact "

587 "(connection timed out)."

588 )

589

590 if warning_message is not None:

591 warnings.warn(warning_message, HypothesisWarning, stacklevel=4)

592 return None

593

594 return response_bytes

595

596 def _fetch_artifact(self) -> Optional[Path]: # pragma: no cover

597 # Get the list of artifacts from GitHub

598 url = f"https://api.github.com/repos/{self.owner}/{self.repo}/actions/artifacts"

599 response_bytes = self._get_bytes(url)

600 if response_bytes is None:

601 return None

602

603 artifacts = json.loads(response_bytes)["artifacts"]

604 artifacts = [a for a in artifacts if a["name"] == self.artifact_name]

605

606 if not artifacts:

607 return None

608

609 # Get the latest artifact from the list

610 artifact = max(artifacts, key=lambda a: a["created_at"])

611 url = artifact["archive_download_url"]

612

613 # Download the artifact

614 artifact_bytes = self._get_bytes(url)

615 if artifact_bytes is None:

616 return None

617

618 # Save the artifact to the cache

619 # We replace ":" with "_" to ensure the filenames are compatible

620 # with Windows filesystems

621 timestamp = datetime.now(timezone.utc).isoformat().replace(":", "_")

622 artifact_path = self.path / f"{timestamp}.zip"

623 try:

624 artifact_path.write_bytes(artifact_bytes)

625 except OSError:

626 warnings.warn(

627 "Could not save the latest artifact from GitHub. ",

628 HypothesisWarning,

629 stacklevel=3,

630 )

631 return None

632

633 return artifact_path

634

635 @staticmethod

636 @lru_cache

637 def _key_path(key: bytes) -> PurePath:

638 return PurePath(_hash(key) + "/")

639

640 def fetch(self, key: bytes) -> Iterable[bytes]:

641 if self._disabled:

642 return

643

644 if not self._initialized:

645 self._initialize_db()

646 if self._disabled:

647 return

648

649 assert self._artifact is not None

650 assert self._access_cache is not None

651

652 kp = self._key_path(key)

653

654 with ZipFile(self._artifact) as zf:

655 # Get the all files in the the kp from the cache

656 filenames = self._access_cache.get(kp, ())

657 for filename in filenames:

658 with zf.open(filename.as_posix()) as f:

659 yield f.read()

660

661 # Read-only interface

662 def save(self, key: bytes, value: bytes) -> None:

663 raise RuntimeError(self._read_only_message)

664

665 def move(self, src: bytes, dest: bytes, value: bytes) -> None:

666 raise RuntimeError(self._read_only_message)

667

668 def delete(self, key: bytes, value: bytes) -> None:

669 raise RuntimeError(self._read_only_message)