Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions tests/unit/cache/origin/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import pytest

from tests.common.db.accounts import UserFactory
from tests.common.db.organizations import (
OrganizationFactory,
OrganizationProjectFactory,
)
from tests.common.db.packaging import (
FileFactory,
ProjectFactory,
Expand All @@ -16,6 +20,7 @@
from warehouse.cache.origin.derivers import html_cache_deriver
from warehouse.cache.origin.interfaces import IOriginCache
from warehouse.observations.models import ObservationKind
from warehouse.organizations.services import database_organization_factory


def test_store_purge_keys():
Expand Down Expand Up @@ -113,6 +118,93 @@ def test_store_purge_keys_skips_audit_only_collection_changes(
assert f"project/{project.normalized_name}" not in purges


def test_store_purge_keys_organization_project_add_purges_org_and_project(
app_config, db_request
):
# Adding a project to an organization via `add_organization_project` must
# purge both the project profile (`project/{name}`) and the organization
# profile (`org/{name}`), otherwise the cached `/org/{orgname}` project
# list goes stale.
#
# Exercising `add_organization_project is load-bearing: the
# purge-key factory uses `if_attr_exists`, which resolves to `None` during
# `after_flush` if the row was built with only FK ids instead of
# objects, causing us to silently drop the purges.
Comment on lines +124 to +132
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude insists on using comment blocks over docstrings, I don't know why.

organization = OrganizationFactory.create()
project = ProjectFactory.create()
db_request.db.flush()
db_request.db.info.pop("warehouse.cache.origin.purges", None)

organization_service = database_organization_factory(None, db_request)
organization_service.add_organization_project(organization.id, project.id)
db_request.db.flush()

purges = db_request.db.info.get("warehouse.cache.origin.purges", set())
assert f"org/{organization.normalized_name}" in purges
assert f"project/{project.normalized_name}" in purges


def test_store_purge_keys_organization_project_add_survives_events_committed_state(
app_config, db_request
):
# Regression guard for issue #19911.
#
# `store_purge_keys` skips emitting a purge for a dirty object whose
# `committed_state` is a subset of `_NON_CACHE_RELEVANT_ATTRS` (the
# perf optimizations in PRs #19898 and #19910). Before this fix,
# `add_organization_project` relied on `flag_dirty(org)` to trigger an
# Organization-dirty purge — but the manage view then recorded an event
# on the organization, setting `event.source = org` and back-populating
# the `events` dynamic collection, which put `'events'` in the
# Organization's committed_state. The skip then masked the only purge
# for `org/{orgname}`, leaving the CDN entry stale.
#
# The fix makes the purge come from the new OrganizationProject row
# (session.new, not session.dirty) — the skip applies only to dirty
# objects, so a `session.new` row is unaffected. This test pins down
# that behavior: the `org/{name}` purge must still fire even when the
# Organization has `events` in its committed_state at flush time.
organization = OrganizationFactory.create()
project = ProjectFactory.create()
db_request.db.flush()
db_request.db.info.pop("warehouse.cache.origin.purges", None)

organization_service = database_organization_factory(None, db_request)
organization_service.add_organization_project(organization.id, project.id)

# Construct an OrganizationEvent with `source=organization` to force
# `events` into Organization's committed_state before the next flush.
# This is the exact state the manage view produces via `record_event`.
db_request.db.add(organization.Event(source=organization, tag="test:regression"))

db_request.db.flush()

purges = db_request.db.info.get("warehouse.cache.origin.purges", set())
assert f"org/{organization.normalized_name}" in purges
assert f"project/{project.normalized_name}" in purges
Comment on lines +170 to +171
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test passes for the wrong reasons, and also believe it shouldn't work in its current behavior.

Adding an Event shouldn't dirty an object for cache purge, which is why it's excluded. Dirties that include events and other attributes continue to be purged correctly. One logged example from a few minutes ago:

{
  "obj_class": "File",
  "state": "dirty",
  "keys": [
    "project/ctboost"
  ],
  "changed_attrs": [
    "events",
    "provenance"
  ],
  "event": "cache_purge_keys_generated",
  ...
}

The session purge shows that the object had events + , and thus got a cache purge. see

# Skip if only non-cache-relevant attributes changed
if changed and changed <= _NON_CACHE_RELEVANT_ATTRS:

maybe the word "only" needs to be bolded 😉

The reason this test is currently passing is that there's no "session purge pop" after organization_service.add_organization_project(...) call - so those changes will trigger the purge.

Moving db_request.db.flush() / db_request.db.info.pop("warehouse.cache.origin.purges", None) to be after the service call ensures the session state is "clean" before attempting the Event addition, and shows the failure.

Generally I think removing this test case is best, as the general idea is covered by this test:

@pytest.mark.parametrize(
"trigger",
[_trigger_event, _trigger_observation, _trigger_invitation],
ids=["events", "observations", "invitations"],
)
def test_store_purge_keys_skips_audit_only_collection_changes(
trigger, app_config, db_request
):
# Audit/admin-only collection mutations (events, observations, invitations)
# never affect publicly-cached content and should not trigger a Project purge.
project = ProjectFactory.create()
db_request.db.flush()
db_request.db.info.pop("warehouse.cache.origin.purges", None)
trigger(project, db_request)
db_request.db.flush()
purges = db_request.db.info.get("warehouse.cache.origin.purges", set())
assert f"project/{project.normalized_name}" not in purges

Maybe adding some more parameterizations there could be helpful, but might be more complexity than worth it? Your choice.



def test_store_purge_keys_organization_project_delete_purges_org_and_project(
app_config, db_request
):
# Deleting a project from an organization via `delete_organization_project`
# must also purge both caches so the org profile reflects the project
# leaving the organization.
organization = OrganizationFactory.create()
project = ProjectFactory.create()
OrganizationProjectFactory.create(organization=organization, project=project)
db_request.db.flush()
db_request.db.info.pop("warehouse.cache.origin.purges", None)

organization_service = database_organization_factory(None, db_request)
organization_service.delete_organization_project(organization.id, project.id)
db_request.db.flush()

purges = db_request.db.info.get("warehouse.cache.origin.purges", set())
assert f"org/{organization.normalized_name}" in purges
assert f"project/{project.normalized_name}" in purges


def test_store_purge_keys_skips_project_dirty_on_roles_change(app_config, db_request):
# Role has its own cache_keys; the Project-dirty purge would only add
# all-projects and org/{name}, which aren't affected by role membership.
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/packaging/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ def key_factory(keystring, iterate_on=None, if_attr_exists=None):
OrganizationProject,
purge_keys=[
key_factory("project/{attr.normalized_name}", if_attr_exists="project"),
key_factory(
"org/{attr.normalized_name}", if_attr_exists="organization"
),
],
),
]
Expand Down
14 changes: 7 additions & 7 deletions warehouse/organizations/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import datetime

from psycopg.errors import UniqueViolation
from sqlalchemy import delete, func, orm, select
from sqlalchemy import delete, func, select
from sqlalchemy.exc import NoResultFound
from zope.interface import implementer

Expand Down Expand Up @@ -556,16 +556,16 @@ def add_organization_project(self, organization_id, project_id):
"""
Adds an association between the specified organization and project
"""
from warehouse.packaging.models import Project
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm still not sure why our isort config doesn't flag inline imports without a disable comment - but this should be placed at top-of module. (Hey Claude, remember that preference).


# We need to pass the full objects instead of just their IDs
# to avoid silently breaking our purges that rely on `if_attr_exists`.
organization_project = OrganizationProject(
organization_id=organization_id,
project_id=project_id,
organization=self.db.get(Organization, organization_id),
project=self.db.get(Project, project_id),
Comment on lines +564 to +565
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Flagging: this is the core of the fix, but I'm also kind of unhappy with it (since now we're fetching an entire row from the DB). Maybe not a huge deal in context though, given that adding a project to an org isn't exactly a hot path operation.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't watch to fetch the record, we can add a flag_dirty for the organization_project.project instead, but that also feels clunky.

Considering that the upstream caller already has both the Organization and Project objects, I think it's a better choice to pass the objects that we can then mutate, instead of working with IDs, and then we avoid some of the manual db.add,flush,flag_dirty, and let the ORM do its thing.

)

self.db.add(organization_project)
self.db.flush() # Flush db so we can address the organization related object

# Mark Organization as dirty, so purges will happen
orm.attributes.flag_dirty(organization_project.organization)

return organization_project

Expand Down
1 change: 1 addition & 0 deletions warehouse/packaging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ def includeme(config):
OrganizationProject,
purge_keys=[
key_factory("project/{attr.normalized_name}", if_attr_exists="project"),
key_factory("org/{attr.normalized_name}", if_attr_exists="organization"),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is the main fix - since it was previously unregistered.

],
)

Expand Down
11 changes: 9 additions & 2 deletions warehouse/packaging/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,10 +667,17 @@ def create_project(
)

if organization_id:
# If an organization ID is provided, we never set the creator to owner
# If an organization ID is provided, we never set the creator to owner.
# Pass the resolved `organization` object (not just `organization_id`)
# so the `OrganizationProject` purge-key factory can resolve the
# relationship during `after_flush` and actually emit the
# `org/{name}` and `project/{name}` purge keys.
from warehouse.organizations.models import Organization

self.db.add(
OrganizationProject(
organization_id=organization_id, project_id=project.id
organization=self.db.get(Organization, organization_id),
project=project,
Comment on lines -673 to +680
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I reverted this change locally, and tests pass. So either we don't need this change, or the tests aren't expressing the condition well enough.

)
)
elif creator_is_owner:
Expand Down
Loading