diff --git a/bin/wheels/runtime/certifi-2026.4.22-py3-none-any.whl b/bin/wheels/runtime/certifi-2026.4.22-py3-none-any.whl deleted file mode 100644 index 4bb92af6bf..0000000000 Binary files a/bin/wheels/runtime/certifi-2026.4.22-py3-none-any.whl and /dev/null differ diff --git a/bin/wheels/runtime/certifi-2026.5.20-py3-none-any.whl b/bin/wheels/runtime/certifi-2026.5.20-py3-none-any.whl new file mode 100644 index 0000000000..a2e0e89ccb Binary files /dev/null and b/bin/wheels/runtime/certifi-2026.5.20-py3-none-any.whl differ diff --git a/requirements.all.txt b/requirements.all.txt index 2f1c82ffb8..403ba1e8d8 100644 --- a/requirements.all.txt +++ b/requirements.all.txt @@ -1,12 +1,13 @@ +ast_serialize==0.5.0 atomicwrites==1.4.1 attrs==26.1.0 aws-requests-auth==0.4.3 -blessed==1.41.0 +blessed==1.42.0 boto3==1.43.10 boto3-stubs-lite==1.43.10 botocore==1.43.10 botocore-stubs==1.42.41 -certifi==2026.4.22 +certifi==2026.5.20 cffi==2.0.0 chalice==1.32.0+20 charset-normalizer==3.4.7 @@ -35,7 +36,7 @@ google-cloud-storage==3.10.1 google-crc32c==1.8.0 google-resumable-media==2.9.0 googleapis-common-protos==1.75.0 -greenlet==3.5.0 +greenlet==3.5.1 grpc-google-iam-v1==0.14.4 grpcio==1.80.0 grpcio-status==1.80.0 @@ -59,14 +60,14 @@ more-itertools==11.0.2 moto==5.2.1 msgpack==1.1.2 msgpack-types==0.7.0 -mypy==1.20.2 +mypy==2.1.0 mypy-boto3-apigateway==1.43.0 mypy-boto3-cloudwatch==1.43.2 mypy-boto3-dynamodb==1.43.0 mypy-boto3-ec2==1.43.10 mypy-boto3-ecr==1.43.0 mypy-boto3-iam==1.43.2 -mypy-boto3-kms==1.43.0 +mypy-boto3-kms==1.43.12 mypy-boto3-lambda==1.43.0 mypy-boto3-opensearch==1.43.7 mypy-boto3-s3==1.43.5 diff --git a/requirements.dev.trans.txt b/requirements.dev.trans.txt index 541421a2e9..9e0e725da0 100644 --- a/requirements.dev.trans.txt +++ b/requirements.dev.trans.txt @@ -1,11 +1,12 @@ -blessed==1.41.0 +ast_serialize==0.5.0 +blessed==1.42.0 botocore-stubs==1.42.41 click==8.4.0 editor==1.8.0 et_xmlfile==2.0.0 gitdb==4.0.12 google-auth-httplib2==0.4.0 -greenlet==3.5.0 +greenlet==3.5.1 grpc-google-iam-v1==0.14.4 httplib2==0.31.2 inquirer==3.4.1 @@ -20,7 +21,7 @@ mypy-boto3-dynamodb==1.43.0 mypy-boto3-ec2==1.43.10 mypy-boto3-ecr==1.43.0 mypy-boto3-iam==1.43.2 -mypy-boto3-kms==1.43.0 +mypy-boto3-kms==1.43.12 mypy-boto3-lambda==1.43.0 mypy-boto3-opensearch==1.43.7 mypy-boto3-s3==1.43.5 diff --git a/requirements.dev.txt b/requirements.dev.txt index f7787fd042..d29c513d2d 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -15,7 +15,7 @@ isort==8.0.1 jq==1.11.0 msgpack-types==0.7.0 # versioned independently from the corresponding runtime dependency but when updating one, the other should be updated, too moto[s3,sqs,sns,dynamodb,iam]==5.2.1 # match the extras with the backends listed in AzulUnitTestCase._reset_moto -mypy==1.20.2 +mypy==2.1.0 openapi-spec-validator==0.7.2 # < 0.8.0, see https://github.com/DataBiosphere/azul/issues/7832 openpyxl==3.1.5 posix_ipc==1.3.2 diff --git a/requirements.trans.txt b/requirements.trans.txt index 056acb6ad6..9e6db2063a 100644 --- a/requirements.trans.txt +++ b/requirements.trans.txt @@ -1,4 +1,4 @@ -certifi==2026.4.22 +certifi==2026.5.20 cffi==2.0.0 charset-normalizer==3.4.7 cryptography==48.0.0 diff --git a/scripts/claude_mv.py b/scripts/claude_mv.py index ff20f9b3f7..0159696fa4 100644 --- a/scripts/claude_mv.py +++ b/scripts/claude_mv.py @@ -21,6 +21,7 @@ Mapping, Sequence, ) +import uuid import attrs @@ -202,9 +203,9 @@ class MoveSessionCommand(Command): @classmethod def add_subparser(cls, subparsers: argparse._SubParsersAction) -> None: parser = subparsers.add_parser('session', help=cls.__doc__) - parser.add_argument('session_id', - metavar='SESSION_ID', - help='The UUID of the session to move') + parser.add_argument('session', + metavar='SESSION', + help='The UUID or name of the session to move') parser.add_argument('src_project', metavar='SOURCE', help='The path of the source project directory') @@ -214,7 +215,6 @@ def add_subparser(cls, subparsers: argparse._SubParsersAction) -> None: parser.set_defaults(command_class=cls) def execute(self) -> None: - session_id = self._args.session_id src_project_dir = Path(self._args.src_project).resolve() dst_project_dir = Path(self._args.dst_project).resolve() assert src_project_dir.is_dir(), R( @@ -233,6 +233,12 @@ def execute(self) -> None: assert src_context_dir != dst_context_dir, R( 'Source and destination projects are the same', src_project_dir) + session = self._args.session + if self._is_uuid(session): + session_id = session + else: + session_id = self._find_session_by_name(src_context_dir, session) + log.info('Resolved session name %r to %r', session, session_id) session_base_name = session_id + '.jsonl' src_session_file = src_context_dir / session_base_name dst_session_file = dst_context_dir / session_base_name @@ -258,6 +264,36 @@ def execute(self) -> None: log.info('Rewriting session paths') self._rewrite_session(dst_session_file, src_project_dir, dst_project_dir) + def _is_uuid(self, session: str) -> bool: + try: + uuid.UUID(session) + except ValueError: + return False + else: + return True + + def _find_session_by_name(self, + context_dir: Path, + name: str + ) -> str: + matches: set[str] = set() + for session_file in context_dir.glob('*.jsonl'): + session_id = session_file.stem + title = None + for line in session_file.open(): + entry = json.loads(line) + if entry.get('type') == 'custom-title': + title = json_str(entry['customTitle']) + if title == name: + matches.add(session_id) + match tuple(matches): + case (session_id, ): + return session_id + case (): + assert False, R('No session with this name', name) + case _: + assert False, R('Multiple sessions with this name', name, matches) + def _read_session_index(self, context_dir: Path) -> MutableJSON | None: index_file = context_dir / 'sessions-index.json' if index_file.exists(): diff --git a/scripts/pull_request.py b/scripts/pull_request.py index 9c2fe140d7..1d72a555db 100644 --- a/scripts/pull_request.py +++ b/scripts/pull_request.py @@ -20,6 +20,14 @@ import re import subprocess import sys +from typing import ( + Literal, +) + +import attrs +from furl import ( + furl, +) from azul.lib import ( R, @@ -27,6 +35,9 @@ from azul.lib.strings import ( format_and_dedent as fd, ) +from azul.lib.types import ( + check_type, +) from azul.logging import ( configure_script_logging, ) @@ -83,26 +94,49 @@ def main(argv): else: assert False, R('Unsupported template', args.type) log.info('Fetching issue #%d …', issue_number) - issue_title, issue_type = _issue_info(issue_number) + issue = _issue_info(issue_number) if args.fix is None: - fix = issue_type == 'Defect' + fix = issue.type == 'Defect' else: fix = args.fix - title = _pr_title(issue_number, issue_title, fix, suffix=title_suffix) + title = _pr_title(issue_number, issue.title, fix, suffix=title_suffix) log.info('Checking for existing PR …') existing_pr = _existing_pr() + template = template_path.read_text() if existing_pr is None: - body = template_path.read_text() + body = template else: body = existing_pr['body'] + expected_comment = template.split('-->', maxsplit=1)[0] + assert body.startswith(expected_comment), R( + 'Existing PR was created with a different template') # Normalize line endings from GitHub API responses body = '\n'.join(body.splitlines()) body = _reference_issue_in_body(body, issue_number) + m = re.search(r'^- \[[ x]] Target branch is `(.+?)`$', + template, flags=re.MULTILINE) + assert m is not None, R('Target branch task not found in template') + target_branch = m.group(1) + target_branch_task = r'Target branch is `' + re.escape(target_branch) + '`' + if existing_pr is None: + body = _check_task(body, target_branch_task) + else: + base = existing_pr['baseRefName'] + if base == target_branch: + body = _check_task(body, target_branch_task) + else: + log.warning('Target branch is %r, expected %r', base, target_branch) + body = _check_task(body, target_branch_task, checked=False) + + has_u_tag = _has_commit_tag(target_branch, 'u') + body = _check_task(body, r'Added `u` tag to commit title.*', checked=has_u_tag) + body = _check_task(body, r'This PR is labeled `upgrade`.*', checked=has_u_tag) + body = _check_task(body, 'PR is assigned to the author') body = _check_task(body, r'Status of PR is \*In progress\*') body = _check_task(body, 'Name of PR branch matches .*') @@ -121,6 +155,7 @@ def main(argv): result = subprocess.run( [ 'gh', 'pr', 'create', + '--base', target_branch, '--title', title, '--body', body, '--assignee', '@me', @@ -146,11 +181,13 @@ def main(argv): ) log.info('PR URL is %r', pr_url) + _label(pr_url, 'upgrade', mode='add' if has_u_tag else 'remove') + log.info('Setting PR status …') - pr_node_id = _node_id('pr', pr_url) + pr_node_id = _node_id(pr_url) _set_status(pr_node_id, 'In Progress') log.info('Setting issue status …') - issue_node_id = _node_id('issue', str(issue_number)) + issue_node_id = _node_id(issue.url) _set_status(issue_node_id, 'In Progress') @@ -200,6 +237,25 @@ def _check_remote_branch(branch: str) -> None: log.warning('Remote and local branch diverge. A force push is needed') +def _commit_title_tags(title: str) -> set[str]: + m = re.match(r'^\[([^]]*)]', title) + if m is None: + return set() + else: + return set(m.group(1).split()) + + +def _has_commit_tag(target_branch: str, tag: str) -> bool: + result = subprocess.run( + ['git', 'log', '--format=%s', f'{target_branch}..HEAD'], + capture_output=True, text=True, check=True + ) + return any( + tag in _commit_title_tags(title) + for title in result.stdout.splitlines() + ) + + def _issue_number(branch: str) -> int: m = re.fullmatch(r'issues/[^/]+/(\d+)-.*', branch) assert m is not None, R('Cannot extract issue number from branch name', branch) @@ -235,7 +291,14 @@ def _promotion_date_and_target(branch: str) -> tuple[str, str]: return m.group(1), m.group(2) -def _issue_info(issue_number: int) -> tuple[str, str]: +@attrs.frozen +class _IssueInfo: + title: str + type: str + url: str + + +def _issue_info(issue_number: int) -> _IssueInfo: result = subprocess.run( [ 'gh', 'api', 'graphql', @@ -244,6 +307,7 @@ def _issue_info(issue_number: int) -> tuple[str, str]: repository(owner: "{owner}", name: "azul") {{ issue(number: {number}) {{ title + url issueType {{ name }} }} }} @@ -254,7 +318,11 @@ def _issue_info(issue_number: int) -> tuple[str, str]: ) issue = json.loads(result.stdout)['data']['repository']['issue'] issue_type = issue['issueType'] - return issue['title'], issue_type['name'] if issue_type else '' + return _IssueInfo( + title=issue['title'], + type=issue_type['name'] if issue_type else '', + url=issue['url'] + ) def _pr_title(issue_number: int, @@ -268,7 +336,7 @@ def _pr_title(issue_number: int, def _existing_pr() -> dict | None: result = subprocess.run( - ['gh', 'pr', 'view', '--json', 'url,body'], + ['gh', 'pr', 'view', '--json', 'url,body,baseRefName'], capture_output=True, text=True ) if result.returncode != 0: @@ -285,15 +353,13 @@ def _reference_issue_in_body(body: str, issue_number: int) -> str: return body -def _check_task(body: str, task: str) -> str: - body_new, n = re.subn(r'^- \[ ] (' + task + ')$', - r'- [x] \1', - body, flags=re.MULTILINE) +def _check_task(body: str, task: str, checked: bool = True) -> str: + mark = 'x' if checked else ' ' + body, n = re.subn(r'^- \[[ x]] (' + task + ')$', + r'- [' + mark + r'] \1', + body, flags=re.MULTILINE) + assert n > 0, R('Task item not found in template', task) assert n < 2, R('Multiple matching task items found', task) - if n > 0: - return body_new - assert re.search(r'^- \[x] ' + task + '$', body, flags=re.MULTILINE), R( - 'Task item not found in template', task) return body @@ -311,14 +377,35 @@ def _github_user() -> str: return result.stdout.strip() -def _node_id(kind: str, ref: str) -> str: +def _gh_item_type(url: str) -> str: + path_kind = furl(url).path.segments[2] + result = {'pull': 'pr', 'issues': 'issue'}.get(path_kind) + assert result is not None, R('Cannot determine issue or PR from URL', url) + return result + + +def _node_id(url: str) -> str: + item_type = _gh_item_type(url) result = subprocess.run( - ['gh', kind, 'view', ref, '--json', 'id', '--jq', '.id'], + ['gh', item_type, 'view', url, '--json', 'id', '--jq', '.id'], capture_output=True, text=True, check=True ) return result.stdout.strip() +type LabelMode = Literal['add', 'remove'] + + +def _label(item_url: str, label: str, *, mode: LabelMode) -> None: + assert check_type(LabelMode, mode) + item_type = _gh_item_type(item_url) + log.info('%s label %r to %r …', mode.title() + 'ing', label, item_url) + subprocess.run( + ['gh', item_type, 'edit', item_url, f'--{mode}-label', label], + capture_output=True, text=True + ) + + def _set_status(node_id: str, status: str) -> None: project_id = _project_id() diff --git a/src/azul/__init__.py b/src/azul/__init__.py index 1fb57408f8..b81f78e374 100644 --- a/src/azul/__init__.py +++ b/src/azul/__init__.py @@ -858,11 +858,12 @@ def from_json(cls, spec: JSON) -> Self: _it_catalog_suffix: ClassVar[str] = '-it' - _catalog_re: str = r'([a-z][a-z0-9]*(-[a-z0-9]+)*)' - _catalog_re = r'(?=.{1,64}$)' + _catalog_re - _it_catalog_re: str = _catalog_re + rf'(?<={re.escape(_it_catalog_suffix)})' - _it_catalog_re: ClassVar[re.Pattern] = re.compile(_it_catalog_re) - _catalog_re: ClassVar[re.Pattern] = re.compile(_catalog_re) + _catalog_re = re.compile( + r'(?=.{1,64}$)([a-z][a-z0-9]*(-[a-z0-9]+)*)' + ) + _it_catalog_re = re.compile( + _catalog_re.pattern + rf'(?<={re.escape(_it_catalog_suffix)})' + ) def __attrs_post_init__(self): self.validate_name(self.name) diff --git a/src/azul/http.py b/src/azul/http.py index ad8ba2f802..b12c6a75b6 100644 --- a/src/azul/http.py +++ b/src/azul/http.py @@ -180,7 +180,7 @@ def urlopen(self, method, url, *args, **kwargs) -> urllib3.BaseHTTPResponse: def http_client(log: logging.Logger | None = None) -> HttpClient: client = urllib3.PoolManager(ca_certs=certifi.where()) - client: HttpClient = DisableCrossHostRedirectClient(client) + client = DisableCrossHostRedirectClient(client) if log is not None: client = LoggingHttpClient(client, log) return StatusRetryHttpClient(client) diff --git a/src/azul/indexer/index_service.py b/src/azul/indexer/index_service.py index c555eb06ae..d7fa7f2d15 100644 --- a/src/azul/indexer/index_service.py +++ b/src/azul/indexer/index_service.py @@ -890,7 +890,7 @@ def _write_individually(self, documents: Iterable[Document]): def _write_bulk(self, documents: Iterable[Document]): # FIXME: document this quirk - documents: dict[DocumentCoordinates, Document] = { + docs_by_coordinates: dict[DocumentCoordinates, Document] = { doc.coordinates.with_catalog(self.catalog): doc for doc in documents } if self.catalog is not None else { @@ -935,7 +935,7 @@ def expand_action(doc: Any) -> tuple[JSON, JSON | None]: # method immediately maps the value of the `expand_action_callback` # parameter over the list passed in the `actions` parameter. response = streaming_bulk(client=self.opensearch, - actions=list(documents.values()), + actions=list(docs_by_coordinates.values()), expand_action_callback=expand_action, refresh=self.refresh, raise_on_error=False, @@ -944,7 +944,7 @@ def expand_action(doc: Any) -> tuple[JSON, JSON | None]: op_type, info = one(info.items()) assert op_type in OpType.__members__, op_type coordinates = DocumentCoordinates.from_hit(info) - doc = documents[coordinates] + doc = docs_by_coordinates[coordinates] if success: self._on_success(doc) else: diff --git a/src/azul/infra/bigquery_reservation.py b/src/azul/infra/bigquery_reservation.py index f1eb9adc59..c0b25e2c13 100644 --- a/src/azul/infra/bigquery_reservation.py +++ b/src/azul/infra/bigquery_reservation.py @@ -227,8 +227,8 @@ def deactivate(self) -> None: if not self.dry_run and self.is_active is not False: raise RuntimeError(f'Failed to delete slots in location {self.location!r}') - def _single_resource(self, resources: ResourcePager) -> Resource | None: - resources: list[Resource] = list(resources) + def _single_resource(self, pager: ResourcePager) -> Resource | None: + resources: list[Resource] = list(pager) try: resource, *extras = resources except ValueError: diff --git a/src/azul/lib/types.py b/src/azul/lib/types.py index 4d7dcf8b7c..53421bf18a 100644 --- a/src/azul/lib/types.py +++ b/src/azul/lib/types.py @@ -13,6 +13,7 @@ Any, Callable, ForwardRef, + Literal, NotRequired, Optional, Protocol, @@ -767,6 +768,12 @@ def check_type(type_expression: TypeExpression, value: Any) -> bool: >>> check_type(G, {'x': 22, 'z': 44}) False + >>> check_type(Literal['a', 'b'], 'a') + True + >>> check_type(Literal['a', 'b'], 'c') + False + >>> check_type(Literal[42], 'c') + False """ return _check_type(type_expression, value, {}) @@ -796,6 +803,8 @@ def _check_type(t: TypeExpression | TypeVar, ot, ats = not_none(get_origin(t)), get_args(t) if ot in (ReadOnly, Required, NotRequired): return _check_type(one(ats), x, tvs) + elif ot is Literal: + return x in ats else: tps = getattr(ot, '__type_params__', ()) if tps: diff --git a/src/azul/service/manifest_service.py b/src/azul/service/manifest_service.py index 4a02f52b57..eb00e67e30 100644 --- a/src/azul/service/manifest_service.py +++ b/src/azul/service/manifest_service.py @@ -1580,8 +1580,8 @@ def _write(file: JSON, is_related_file: bool = False): contents = json_mapping(doc['contents']) files = json_sequence(contents['files']) file = json_mapping(one(files)) - source: JSON = one(json_sequence_of_mappings(doc['sources'])) - source: SourceRef = SourceRef.from_json(source) + source_json = json_mapping(one(json_sequence(doc['sources']))) + source: SourceRef = SourceRef.from_json(source_json) # On AnVIL, and for political reasons, we are not permitted to # include managed-access files, even if they are accessible to diff --git a/src/azul/terra.py b/src/azul/terra.py index 3c0c53fff5..58db35c2f8 100644 --- a/src/azul/terra.py +++ b/src/azul/terra.py @@ -652,7 +652,7 @@ def get_duos(self, Return the DUOS ID and DUOS dataset registration information for the given TDR snapshot. """ - body = self._retrieve_source(source) + body: MutableJSON = self._retrieve_source(source) try: duos_id = json_str(json_dict(body['duosFirecloudGroup'])['duosId']) except (KeyError, AssertionError): diff --git a/test/test_tagging.py b/test/test_tagging.py index e425a8a7ba..92efb8979c 100644 --- a/test/test_tagging.py +++ b/test/test_tagging.py @@ -84,7 +84,7 @@ def test(self): 'aws_untaggable_resource': {'foo': {}} } } - expected: JSON = { + expected = { 'resource': [ {'aws_untaggable_resource': [{'foo': {}}]} ]