diff --git a/mixpanel-plugin/.claude-plugin/plugin.json b/mixpanel-plugin/.claude-plugin/plugin.json index 25c4ad8..3d86770 100644 --- a/mixpanel-plugin/.claude-plugin/plugin.json +++ b/mixpanel-plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "mixpanel-headless", - "version": "0.1.0", + "version": "0.1.1", "description": "Query and analyze Mixpanel data with Python. Provides the mixpanel_headless auth surface (Account → Project → Workspace hierarchy), API (5 query engines, discovery, entity CRUD), and Business Context read/write (the markdown documentation that grounds AI assistants, at org and project scopes), with a live documentation system (help.py) for method signatures, type lookup, fuzzy search, and hosted docs.", "author": { "name": "Jared McFarland", diff --git a/mixpanel-plugin/commands/auth.md b/mixpanel-plugin/commands/auth.md index 84fc35d..437d82b 100644 --- a/mixpanel-plugin/commands/auth.md +++ b/mixpanel-plugin/commands/auth.md @@ -29,6 +29,21 @@ arguments, run `session`. ### "login" +Two routing paths — same-machine vs headless. Detect the environment +first: + +- **Same-machine** (you can launch the user's browser from the terminal + the user is in — typical Claude Code CLI setup): run `mp login` + directly and let the user complete the flow in their browser. +- **Headless** (Claude Cowork sandbox, devcontainer, browserless SSH — + `$DISPLAY` unset, `$BROWSER` unset, or you know you're in Cowork): use + the two-shot `--start` / `--finish` flow that emits machine-parseable + JSON envelopes. The CLI process can't reach the user's host browser + via loopback, so the flow runs in two CLI invocations bridged by the + user pasting the redirect URL back into chat. + +#### Same-machine path + For first-time setup, the frictionless one-shot path is `mp login`. It runs the right auth flow for the environment, derives the account name from `/me`, and pins a default project. Tell the user to run: @@ -49,11 +64,121 @@ Optional flags they may want: - `--project ID` — skip the project picker - `--service-account` — force the SA path (requires `MP_USERNAME` + `MP_SECRET` in env) - `--token-env VAR` — force the static-bearer path (reads token from `$VAR`) -- `--no-browser` — print the authorization URL instead of launching a browser +- `--no-browser` — print the authorization URL instead of launching a browser (still requires a TTY for paste-back) After the user confirms they ran it, verify with `account test`: `python3 ${CLAUDE_PLUGIN_ROOT}/skills/mixpanelyst/scripts/auth_manager.py account test` +#### Headless path (Claude Cowork, devcontainers, browserless remote) + +The two-shot `mp login --start` / `--finish` flow exists for environments +where the loopback OAuth callback can't reach the user's host browser. +You drive the dance directly via `Bash` calls and `AskUserQuestion`: + +1. **Start.** Run: + ``` + ! mp login --start + ``` + For EU / India users, append `--region eu` or `--region in`. The CLI + prints a single-line JSON envelope to stdout. Parse it as + `json.loads(stdout)`. + + Expected shape: + ```json + { + "schema_version": 1, + "state": "ok", + "authorize_url": "https://mixpanel.com/oauth/authorize/?...", + "redirect_uri": "http://localhost:19284/callback", + "expires_at": , + "region": "us", + "inflight_path": "/home/.../inflight.json" + } + ``` + +2. **Present the URL.** Show `authorize_url` to the user with this + exact framing: + + > Open this URL in your browser, complete login, then copy the URL + > from your browser's address bar back here. The page will fail to + > load with "site can't be reached" — that's expected; copy the URL + > anyway. Tested on Chrome, Firefox, and Safari. + + Use `AskUserQuestion` to collect the pasted URL. + +3. **Finish.** Pass the pasted URL verbatim to `--finish`. Quote it so + shell metacharacters in the query string don't break the call: + ``` + ! mp login --finish '' + ``` + + Expected `state: ok` envelope: + ```json + { + "schema_version": 1, + "state": "ok", + "account": {"name": "...", "type": "oauth_browser", "region": "us"}, + "user": {"email": "..."}, + "project": {"id": "...", "name": "..."}, + "project_pick": { + "auto_picked": true, + "method": "primary_org_lowest_id", + "primary_org_name": "...", + "primary_org_survivor_count": , + "accessible_project_count": , + ... + }, + "next": [ + {"command": "mp project list", "label": "See all accessible projects"}, + {"command": "mp project use ", "label": "Switch to a different project"} + ] + } + ``` + + Render based on `project_pick.method`: + - `"explicit"` → "✓ Logged in to project `` as requested." + - `"sole_survivor"` → "✓ Logged in to project `` (your only + active project)." + - `"sole_survivor_filtered"` → "✓ Logged in to project `` — + the only non-demo, integrated project among your + `` projects in this region. The others + are demos or have never received events. Run `mp project list` if + you want to pick a different one." + - `"primary_org_lowest_id"` → "✓ Logged in to project `` — + auto-picked from `` (your most-active org; + `` projects there). Want a different + one?" If yes, run `mp project list` then `mp project use `. + - `"fallback_with_unintegrated"` → "✓ Logged in to project `` + — note: this project hasn't received events. Verify before running + queries." + - `"fallback_with_demos"` → "✓ Logged in to project `` — note: + all your projects are demos. Pass `--project ID` next time if you + want a specific one." + +4. **Handle the `cross_region_only` error.** If `--finish` exits 6 with + `state: error` and `error.code: NEEDS_REGION_SWITCH`, the user + authenticated against the wrong region. Show: + + > Your account doesn't have any projects in ``. The + > `error.details.cross_region_projects` list shows projects in other + > regions. Want to retry with one of those? + + Then offer to re-run `mp login --start --region eu` (or `--region in`) + based on the cross-region list. + +5. **Recovery.** If `--finish` fails AFTER token exchange (e.g., `/me` + timed out, name collision), the CLI leaves a `.tmp-*` placeholder dir + in `~/.mp/accounts/`. Re-run with `--resume ` to retry the + publish without re-running PKCE. The error message will include the + placeholder path. + +After the dance completes, verify with `account test`: +`python3 ${CLAUDE_PLUGIN_ROOT}/skills/mixpanelyst/scripts/auth_manager.py account test` + +**Inflight TTL is 10 minutes.** If the user takes longer than that to +complete browser auth and paste back, `--finish` returns +`OAUTH_INFLIGHT_EXPIRED`. Re-run `mp login --start` to begin again. + ### No arguments or "session" Run: `python3 ${CLAUDE_PLUGIN_ROOT}/skills/mixpanelyst/scripts/auth_manager.py session` diff --git a/mixpanel-plugin/docs/quickstart-claude-cowork.md b/mixpanel-plugin/docs/quickstart-claude-cowork.md index 46cc77a..7abcb1b 100644 --- a/mixpanel-plugin/docs/quickstart-claude-cowork.md +++ b/mixpanel-plugin/docs/quickstart-claude-cowork.md @@ -2,7 +2,12 @@ Get your Mixpanel credentials working inside [Claude Cowork](https://docs.anthropic.com/en/docs/claude-code/cowork) sessions so Claude agents can query your analytics data autonomously. -Cowork runs Claude agents in sandboxed virtual machines. These VMs don't have access to your local config files or browser, so credentials need to be exported from your machine into a "bridge file" that Cowork can read. +Cowork runs Claude agents in sandboxed virtual machines. The CLI has two ways to authenticate inside Cowork: + +1. **Two-shot login (`mp login --start` / `--finish`)** — the simplest path if you don't have `mp` installed locally. Runs entirely inside Cowork; you paste a URL into your host browser, then paste the redirect URL back. No host-side setup. +2. **Bridge file** — if you already have credentials configured on your laptop (e.g., from regular `mp` use), export them into a "bridge file" Cowork can read. One-time setup, lasts across Cowork sessions. + +Both paths work. Pick the one that fits your situation. --- @@ -10,13 +15,65 @@ Cowork runs Claude agents in sandboxed virtual machines. These VMs don't have ac - **Claude Desktop** with Cowork access - **A Mixpanel account** with access to a project -- **One of the following** for authentication: - - A **service account** (username + secret) from your Mixpanel project settings, OR - - A browser for **OAuth login** (interactive — your project is auto-discovered) +- **A browser** on your laptop for OAuth login (works for both paths) + +--- + +## Path A: Two-shot login inside Cowork (recommended for first-time users) + +This path requires nothing on your laptop. Everything happens inside Cowork. + +### Step A1: Start a Cowork session and run setup + +Open a Cowork session and run: + +``` +/mixpanel-headless:setup +``` + +This installs the `mp` CLI in the Cowork VM. + +### Step A2: Run the login slash command + +``` +/mixpanel-headless:auth login +``` + +Claude detects the headless environment and runs the two-shot flow: + +1. Claude runs `mp login --start` and shows you a URL to open. +2. You open the URL in your laptop's browser, complete Mixpanel login. +3. Your browser redirects to `localhost:19284/callback?...` and shows "site can't be reached" — **that's expected**. +4. You copy the URL from your browser's address bar and paste it back into chat. +5. Claude runs `mp login --finish ''`. The CLI exchanges the code for tokens, fetches `/me`, auto-picks a project, and writes the account to `~/.mp/accounts/` inside the VM. + +The 10-minute inflight TTL is generous — switch tabs, complete login, come back, paste. If you take longer, just run `/mixpanel-headless:auth login` again to begin a fresh attempt. + +### Step A3: Start asking questions + +``` +How many signups did we get last week? + +What's our funnel conversion rate from signup to purchase? +``` + +### When this path is best + +- You don't have `mp` installed on your laptop yet. +- You only need access from this one Cowork session (or a few — the VM persists tokens between sessions, but if Cowork rebuilds the VM you'll re-login). +- You're OK with the manual paste-back per fresh VM (one-time cost; auto-refresh handles token rotation thereafter). + +### Tested browsers + +The "site can't be reached" page must preserve `?code=...&state=...` in the address bar so you can copy it. Verified working on Chrome, Firefox, and Safari (default settings). --- -## Step 1: Install the CLI and Set Up Credentials +## Path B: Credential bridge from your laptop + +If you already have `mp` configured on your laptop, this path is faster: one host-side export, then every Cowork session auto-discovers your credentials. + +### Step B1: Install the CLI and set up credentials On your **local machine** (not inside Cowork), install the `mp` command-line tool: @@ -56,7 +113,7 @@ You should see a success message confirming the connection. --- -## Step 2: Export Credentials for Cowork +### Step B2: Export credentials for Cowork On your **local machine**, export the active account into a v2 bridge file at the default Cowork-readable path: @@ -66,30 +123,28 @@ mp account export-bridge --to ~/.claude/mixpanel/auth.json This writes a v2 `auth.json` bridge file embedding your full `Account` record (and any `oauth_browser` tokens). The Cowork VM auto-discovers it on session start. Override the location with the `MP_AUTH_FILE` env var if you need a custom path. -You'll see a brief confirmation, then the bridge is ready: +The CLI prints: ``` -Wrote bridge: ~/.claude/mixpanel/auth.json - Account: personal (oauth_browser, us) - Tokens: included (refresh-capable) +Wrote bridge file to ~/.claude/mixpanel/auth.json ``` ### Options ```bash # Export a specific named account (defaults to the active account) -mp account export-bridge --to ~/.claude/mixpanel/auth.json --account production +mp account export-bridge --to ~/.claude/mixpanel/auth.json --account YOUR_ACCOUNT_NAME # Pin a project ID into the bridge (overrides the account's default_project) -mp account export-bridge --to ~/.claude/mixpanel/auth.json --project 12345 +mp account export-bridge --to ~/.claude/mixpanel/auth.json --project YOUR_PROJECT_ID # Pin a workspace ID into the bridge (needed for dashboard/entity management) -mp account export-bridge --to ~/.claude/mixpanel/auth.json --workspace 3448413 +mp account export-bridge --to ~/.claude/mixpanel/auth.json --workspace YOUR_WORKSPACE_ID ``` --- -## Step 3: Start a Cowork Session and Run Setup +### Step B3: Start a Cowork session and run setup Open a Cowork session and run the setup skill: @@ -97,21 +152,9 @@ Open a Cowork session and run the setup skill: /mixpanel-headless:setup ``` -The setup script automatically detects the Cowork environment and reads credentials from the bridge file. You'll see output like: - -``` -Cowork environment detected. -✓ Auth bridge file found: ~/.claude/mixpanel/auth.json - Account: personal (oauth_browser, us) - Project: 12345 - Tokens: included (refresh-capable) -``` - -No additional configuration is needed inside Cowork. - ---- +The setup script automatically detects the Cowork environment and reads credentials from the bridge file. No additional configuration is needed inside Cowork. -## Step 4: Start Asking Questions +### Step B4: Start asking questions You're ready. Ask Claude questions in natural language, just like in regular Claude Code: @@ -185,9 +228,9 @@ The credential bridge is a v2 JSON file that maps your local credentials into a Your machine Cowork VM ┌─────────────────────────┐ ┌─────────────────────────┐ │ ~/.mp/config.toml │ │ ~/.claude/mixpanel/ │ -│ ~/.mp/accounts// │──account export-bridge──▶│ auth.json │ -│ (account + tokens) │ --to │ (v2 bridge: full │ -│ │ │ Account + tokens) │ +│ (account records) │ │ auth.json │ +│ ~/.mp/accounts// │──account export-bridge──▶│ (v2 bridge: full │ +│ (tokens + me cache) │ --to │ Account + tokens) │ └─────────────────────────┘ └─────────────────────────┘ │ resolve_session() reads @@ -253,20 +296,38 @@ mp --version # verify ### Setup says "Cowork environment detected" but no credentials -**Cause**: You're inside Cowork but the bridge file is missing. +**Cause**: You're inside Cowork without a bridge file AND haven't done the two-shot login. + +**Fix**: Two options: +- Use Path A (two-shot login from inside Cowork): run `/mixpanel-headless:auth login` inside Cowork. +- Use Path B (bridge from your laptop): run `mp account export-bridge --to ~/.claude/mixpanel/auth.json` on your local machine, then start a new Cowork session. + +### Two-shot login: "site can't be reached" page + +**Cause**: When the OAuth provider redirects your browser to `localhost:19284/callback`, no server is listening (the loopback is the Cowork VM, not your laptop). The browser shows "site can't be reached." + +**Fix**: This is the expected behavior. Copy the URL from your browser's address bar (it contains `?code=...&state=...`) and paste it back into chat. Verified to work on Chrome, Firefox, and Safari with default settings. + +### Two-shot login: inflight expired + +**Cause**: You took longer than 10 minutes between `mp login --start` and pasting the redirect URL. + +**Fix**: Re-run `/mixpanel-headless:auth login`. The CLI clobbers the prior inflight and starts fresh. + +### Two-shot login: post-publish failure + +**Cause**: `mp login --finish` succeeded at token exchange but failed at publish (e.g., `/me` timed out, name collision, network blip). -**Fix**: You cannot configure credentials from inside Cowork (no browser, no host terminal). Exit Cowork, run `mp account export-bridge --to ~/.claude/mixpanel/auth.json` on your local machine, then start a new Cowork session. +**Fix**: The CLI leaves a `.tmp-*` placeholder dir in `~/.mp/accounts/`. Re-run `mp login --resume ` to retry the publish without re-running PKCE. The error message includes the placeholder path. -### Important: What Doesn't Work Inside Cowork +### Bridge path: commands that need to run on your laptop -These commands require a browser or host terminal and **should be run on your local machine**, not inside Cowork: +These bridge-management commands require host-side resources and **must run on your local machine**: -- `mp login` and `mp account login ` (need a browser for the PKCE OAuth flow) -- `mp account add` for `service_account` (prompts for secret interactively by default; `--secret-stdin` and `MP_SECRET` env var work non-interactively, but the credential bridge is the recommended approach for Cowork) - `mp account export-bridge --to ` (reads host credentials, writes the bridge file) - `mp account remove-bridge [--at ]` (removes the bridge file from the host) -Always run these on your **local machine** before starting a Cowork session. +Note: `mp login` itself works **inside** Cowork via the two-shot flow (Path A). Only the bridge-export commands above are host-only. --- diff --git a/pyproject.toml b/pyproject.toml index 05eeac2..0d6f67c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "tomli-w>=1.0", "pandas>=2.0", "pyarrow>=17.0; python_version >= '3.11'", - "httpx>=0.27", + "httpx[socks]>=0.27", "typer>=0.12", "rich>=13.0", "jq>=1.9.0", diff --git a/src/mixpanel_headless/__init__.py b/src/mixpanel_headless/__init__.py index ae4de21..e10da20 100644 --- a/src/mixpanel_headless/__init__.py +++ b/src/mixpanel_headless/__init__.py @@ -285,7 +285,7 @@ ) from mixpanel_headless.workspace import Workspace -__version__ = "0.1.0" +__version__ = "0.1.1" __all__ = [ # Core diff --git a/src/mixpanel_headless/_internal/auth/client_registration.py b/src/mixpanel_headless/_internal/auth/client_registration.py index 1f40aae..9f94ef1 100644 --- a/src/mixpanel_headless/_internal/auth/client_registration.py +++ b/src/mixpanel_headless/_internal/auth/client_registration.py @@ -42,6 +42,18 @@ "in": "https://in.mixpanel.com/oauth/", } +#: Canonical project ``domain`` host string keyed by region. +#: Mirrors ``OAUTH_BASE_URLS`` keys so the auto-pick filter can map a +#: region back to the host string ``MeProjectInfo.domain`` reports. +#: Co-located here as the single source of region truth — both the auth +#: endpoint config and the auto-pick region filter resolve through this +#: file. +DOMAIN_FOR_REGION: dict[str, str] = { + "us": "mixpanel.com", + "eu": "eu.mixpanel.com", + "in": "in.mixpanel.com", +} + #: Scopes sent in the DCR request body for server-side validation. #: Advisory only — DCR does NOT store these on the application model. #: The created app has an empty scope field, meaning all scopes are allowed. diff --git a/src/mixpanel_headless/_internal/auth/flow.py b/src/mixpanel_headless/_internal/auth/flow.py index 1d44df2..da4b16d 100644 --- a/src/mixpanel_headless/_internal/auth/flow.py +++ b/src/mixpanel_headless/_internal/auth/flow.py @@ -107,7 +107,11 @@ def _parse_pasted_redirect(line: str, *, expected_state: str) -> CallbackResult: ) code = code_list[0] state = state_list[0] - if state != expected_state: + # Constant-time compare. Practical timing-oracle exploitability against + # a single-use local CLI is limited, but the cost is zero and a string + # `!=` here would be the kind of paper cut a future security audit + # would (correctly) flag. + if not secrets.compare_digest(state, expected_state): raise OAuthError( "State mismatch — the pasted text does not belong to this login session.", code="OAUTH_STATE_MISMATCH", @@ -165,7 +169,16 @@ def __init__( ) self._region = region self._storage = storage or OAuthStorage() - self._http_client = http_client or httpx.Client() + # httpx defaults to 5s for every phase. A cold SOCKS5h handshake + # (DNS-over-proxy + TLS through tunnel) reliably exceeds 5s on first + # call inside Cowork; subsequent warm calls land in 0.5-0.9s. The + # Mixpanel /oauth/ endpoints themselves are fast, so a generous + # read timeout costs nothing in the happy path and prevents the + # confusing "OAUTH_REGISTRATION_ERROR: read timed out" we hit + # repeatedly during the diagnosis phase. + self._http_client = http_client or httpx.Client( + timeout=httpx.Timeout(30.0, connect=10.0), + ) self._base_url = OAUTH_BASE_URLS[region] @property @@ -177,6 +190,61 @@ def region(self) -> str: """ return self._region + @property + def http_client(self) -> httpx.Client: + """Shared :class:`httpx.Client` used for token exchange and DCR. + + Exposed publicly so the two-shot ``mp login --start`` / + ``--finish`` orchestrators in :mod:`mixpanel_headless.accounts` + can pass the same connection pool into + :func:`ensure_client_registered` without reaching into a + leading-underscore attribute. + + Returns: + The underlying ``httpx.Client``. + """ + return self._http_client + + @property + def storage(self) -> OAuthStorage: + """Shared :class:`OAuthStorage` used for token + client persistence. + + Same rationale as :attr:`http_client`: the two-shot login + orchestrators reuse this storage so the cached DCR client info + survives across ``--start`` / ``--finish`` invocations. + + Returns: + The underlying ``OAuthStorage``. + """ + return self._storage + + def close(self) -> None: + """Close the underlying :class:`httpx.Client`. Idempotent. + + Safe to call multiple times — ``httpx.Client.close()`` is a + no-op after the first close. Library callers can either invoke + this directly or use :class:`OAuthFlow` as a context manager. + """ + self._http_client.close() + + def __enter__(self) -> OAuthFlow: + """Enter the context — returns ``self``. + + Returns: + This :class:`OAuthFlow` instance. + """ + return self + + def __exit__(self, *_exc: object) -> None: + """Exit the context — closes the underlying ``httpx.Client``. + + Args: + *_exc: Standard ``__exit__`` arguments (exception type, + value, traceback). Ignored — :meth:`close` runs + regardless of whether the body raised. + """ + self.close() + def get_valid_token(self, region: str) -> str: """Return a valid access token, refreshing if expired. @@ -295,15 +363,17 @@ def login(self, *, persist: bool = False, open_browser: bool = True) -> OAuthTok state=state, ) - # Step 5: Set up two completers that race on a shared queue: - # 1) The local callback server (always — handles same-machine case). - # 2) Stdin paste reader (only when ``open_browser=False`` — handles - # "I opened the URL on my laptop, the localhost redirect failed, - # let me paste the URL back"). - # Whichever produces a valid (code, state) first wins. PKCE verifier - # stays in this process so no on-disk persistence is needed. + # Step 5: Set up two completers that race. The previous version + # used a queue.get(timeout=310.0) on a result queue and only + # checked an error queue if the wait timed out — meaning a + # paste-reader that failed fast (empty stdin, state mismatch) sat + # silently in error_q for 5 minutes while the user thought the + # CLI was hung. Now both completers signal a single + # threading.Event; the main thread wakes within milliseconds of + # either completing and pulls from result_q OR error_q. result_q: queue.Queue[CallbackResult] = queue.Queue(maxsize=1) error_q: queue.Queue[Exception] = queue.Queue(maxsize=1) + done_event = threading.Event() def _run_callback() -> None: """Drive the local callback server; first completer wins.""" @@ -317,6 +387,8 @@ def _run_callback() -> None: except Exception as exc: with contextlib.suppress(queue.Full): error_q.put_nowait(exc) + finally: + done_event.set() def _run_paste_reader() -> None: """Read a pasted redirect URL / query string from stdin.""" @@ -328,10 +400,26 @@ def _run_paste_reader() -> None: except Exception as exc: with contextlib.suppress(queue.Full): error_q.put_nowait(exc) + finally: + done_event.set() threading.Thread(target=_run_callback, daemon=True).start() + # Skip the paste reader entirely when stdin is non-TTY (Cowork + # pipes, CI runners). There is no in-band way to deliver the + # paste, and starting the thread just clutters error_q with a + # fast "empty paste" failure that races with the legitimate + # callback path. Same-machine SSH users with a TTY still get the + # paste fallback for the failed-loopback case. + paste_reader_started = False if not open_browser: - threading.Thread(target=_run_paste_reader, daemon=True).start() + # Some test fixtures swap in a stdin replacement that lacks + # ``isatty`` (intentional or accidental). Treat absence as + # "non-TTY" so we don't AttributeError; users with a real + # TTY have ``isatty`` available. + stdin_is_tty = bool(getattr(sys.stdin, "isatty", lambda: False)()) + if stdin_is_tty: + threading.Thread(target=_run_paste_reader, daemon=True).start() + paste_reader_started = True # Small delay to ensure callback server is listening before opening # browser / printing the URL. @@ -347,6 +435,15 @@ def _run_paste_reader() -> None: details={"authorize_url": authorize_url}, ) from exc else: + paste_hint = ( + "Otherwise, paste the redirect URL (or the `code=...&state=...` " + "portion) below and press Enter:" + if paste_reader_started + else ( + "Stdin is not a TTY — for non-interactive environments, " + "use `mp login --start` and `mp login --finish URL` instead." + ) + ) print( "Open this URL in your browser to authorize:\n" f" {authorize_url}\n" @@ -354,21 +451,34 @@ def _run_paste_reader() -> None: "If the redirect to http://localhost:" f"{bound_port}/callback succeeds, the CLI will continue " "automatically.\n" - "Otherwise, paste the redirect URL (or the `code=...&state=...` " - "portion) below and press Enter:", + f"{paste_hint}", file=sys.stderr, flush=True, ) - # Step 6: Wait for whichever completer produces first. + # Step 6: Wait for either completer to signal done. 310s matches + # the inner callback-server 300s timeout plus a small slack so + # the explicit OAUTH_TIMEOUT below only fires if both completers + # really hung past their own deadlines (callback timed out + no + # paste). Done within milliseconds of either completer setting + # the event in the happy path. + if not done_event.wait(timeout=310.0): + raise OAuthError( + "Login timed out waiting for callback or paste.", + code="OAUTH_TIMEOUT", + ) try: - callback_result = result_q.get(timeout=310.0) - except queue.Empty as empty: + callback_result = result_q.get_nowait() + except queue.Empty: try: err = error_q.get_nowait() - except queue.Empty: + except queue.Empty as empty: + # Both queues empty after done_event fired — should be + # unreachable since every completer puts to one queue + # before setting the event. Fall through to a structured + # error rather than silently hanging. raise OAuthError( - "Login timed out waiting for callback or paste.", + "Login completer signaled done but produced no result.", code="OAUTH_TIMEOUT", ) from empty if isinstance(err, OAuthError): diff --git a/src/mixpanel_headless/_internal/auth/inflight.py b/src/mixpanel_headless/_internal/auth/inflight.py new file mode 100644 index 0000000..223c7b1 --- /dev/null +++ b/src/mixpanel_headless/_internal/auth/inflight.py @@ -0,0 +1,517 @@ +"""Two-shot OAuth login state — on-disk inflight session and placeholder helpers. + +This module persists the small bit of state that needs to survive the +``mp login --start`` / ``mp login --finish`` boundary: + +1. ``inflight.json`` at ``~/.mp/oauth/inflight.json`` carries the PKCE + verifier, OAuth state, registered ``client_id``, redirect URI, and + region for at most :data:`INFLIGHT_TTL_SECONDS` (10 minutes). Without + this, every ``--finish`` would have to re-run PKCE — defeating the + "give me a URL, finish later" model the two-shot flow exists to enable. + +2. Placeholder dir helpers (``new_placeholder_dir``, + ``read_tokens_from_placeholder``, ``cache_me_in_placeholder``, + ``load_cached_me_from_placeholder``, ``save_placeholder_meta``, + ``read_placeholder_meta``) operate on the ``~/.mp/accounts/.tmp-{nonce}`` + scratch dirs the new-account flow uses between token exchange and the + atomic publish. The same machinery powers ``mp login --resume`` for + post-publish-failure recovery. + +Mirrors :func:`bridge._read_browser_tokens` for the on-disk token shape so +the two readers stay in lockstep. +""" + +from __future__ import annotations + +import json +import logging +import os +import secrets +import socket +import time +from dataclasses import asdict, dataclass +from datetime import datetime, timezone +from pathlib import Path + +from pydantic import SecretStr + +from mixpanel_headless._internal.auth.callback_server import CALLBACK_PORTS +from mixpanel_headless._internal.auth.storage import _storage_root +from mixpanel_headless._internal.auth.token import OAuthTokens +from mixpanel_headless._internal.io_utils import atomic_write_bytes +from mixpanel_headless.exceptions import OAuthError + +logger = logging.getLogger(__name__) + +INFLIGHT_TTL_SECONDS = 600 +"""Inflight verifier lifetime in seconds (10 minutes). + +Matches typical OAuth code-grant expiry windows. Long enough that a user +can switch tabs, paste the URL into their host browser, complete login, +and copy the redirect URL back without rushing; short enough that a +forgotten ``--start`` doesn't leave the verifier readable for hours.""" + +INFLIGHT_SCHEMA_VERSION = 1 +"""Schema version for the on-disk ``inflight.json``. + +Bumped only when a non-additive change to :class:`InflightSession` +breaks readers compiled against the older schema. Add fields without +bumping; remove or rename fields with a bump.""" + +PLACEHOLDER_META_SCHEMA_VERSION = 1 +"""Schema version for ``meta.json`` written into placeholder dirs. + +Same additive-vs-breaking discipline as :data:`INFLIGHT_SCHEMA_VERSION`.""" + + +def inflight_path() -> Path: + """Return the inflight file location, honoring ``MP_OAUTH_STORAGE_DIR``. + + Resolved at every call so test isolation via env var monkeypatching + takes effect — a module-level constant captured at import time would + silently leak the developer's real ``~/.mp/`` into hermetic tests. + + Returns: + ``$MP_OAUTH_STORAGE_DIR/oauth/inflight.json`` if set, + else ``$HOME/.mp/oauth/inflight.json``. + """ + return _storage_root() / "oauth" / "inflight.json" + + +@dataclass(frozen=True) +class InflightSession: + """On-disk state carried across the ``--start`` / ``--finish`` boundary. + + Frozen so accidental field mutation between read and use is a TypeError + rather than a silent bug. ``pkce_verifier`` is a single-use secret — + treat it like a refresh token (0o600 file, never logged under -v, no + persistence beyond ``INFLIGHT_TTL_SECONDS``). + + Attributes: + schema_version: :data:`INFLIGHT_SCHEMA_VERSION` (1). + region: Region committed at ``--start`` time. ``--finish`` cannot + switch regions; the Mixpanel auth endpoint is region-bound. + client_id: DCR client ID for ``region``. Cached by + :func:`ensure_client_registered`; the same client_id is reused + on subsequent ``--start`` invocations within the same region. + redirect_uri: Loopback URL the browser will be redirected to. The + URL is registered with the IdP as part of DCR; ``--finish`` + re-uses it verbatim during code exchange. + pkce_verifier: 43-128 char URL-safe base64 verifier. Sent to the + token endpoint at ``--finish`` time. Single-use. + state: 32-byte URL-safe base64 CSRF token. Compared against the + pasted redirect URL's ``state`` parameter at ``--finish`` — + mismatch raises ``OAUTH_STATE_MISMATCH``. + created_at: Unix timestamp when ``--start`` ran. Diagnostic only. + expires_at: ``created_at + INFLIGHT_TTL_SECONDS``. Files past + this timestamp are rejected with ``OAUTH_INFLIGHT_EXPIRED``. + """ + + schema_version: int + region: str + client_id: str + redirect_uri: str + pkce_verifier: str + state: str + created_at: int + expires_at: int + + +def save_inflight(session: InflightSession) -> None: + """Atomically write ``session`` to the inflight file path with mode 0o600. + + Creates the parent directory (typically ``~/.mp/oauth/``) at 0o700 if + missing. A second ``--start`` silently clobbers the prior inflight + (single-user CLI; concurrent logins are not a supported use case). + + Args: + session: The inflight state to persist. + + Raises: + OSError: If the parent directory cannot be created or the write + fails (disk full, permission denied). + """ + path = inflight_path() + old_umask = os.umask(0o077) + try: + path.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + finally: + os.umask(old_umask) + payload = json.dumps(asdict(session), indent=2).encode("utf-8") + atomic_write_bytes(path, payload) + + +def load_inflight() -> InflightSession: + """Read and validate the inflight session from disk. + + Returns: + The parsed :class:`InflightSession`. + + Raises: + OAuthError: ``OAUTH_INFLIGHT_MISSING`` when no file exists, + ``OAUTH_INFLIGHT_EXPIRED`` when ``expires_at < now()``, + ``OAUTH_INFLIGHT_CORRUPT`` on JSON parse / missing-key / + type-mismatch failures. + """ + path = inflight_path() + if not path.exists(): + raise OAuthError( + f"No inflight session at {path}. Run `mp login --start` first.", + code="OAUTH_INFLIGHT_MISSING", + details={"path": str(path)}, + ) + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + raise OAuthError( + f"Inflight session at {path} could not be parsed: {exc}", + code="OAUTH_INFLIGHT_CORRUPT", + details={"path": str(path)}, + ) from exc + if not isinstance(raw, dict): + raise OAuthError( + f"Inflight session at {path} is not a JSON object.", + code="OAUTH_INFLIGHT_CORRUPT", + details={"path": str(path)}, + ) + required = ( + "schema_version", + "region", + "client_id", + "redirect_uri", + "pkce_verifier", + "state", + "created_at", + "expires_at", + ) + missing = [k for k in required if k not in raw] + if missing: + raise OAuthError( + f"Inflight session at {path} is missing required keys: {missing}.", + code="OAUTH_INFLIGHT_CORRUPT", + details={"path": str(path), "missing": missing}, + ) + try: + session = InflightSession( + schema_version=int(raw["schema_version"]), + region=str(raw["region"]), + client_id=str(raw["client_id"]), + redirect_uri=str(raw["redirect_uri"]), + pkce_verifier=str(raw["pkce_verifier"]), + state=str(raw["state"]), + created_at=int(raw["created_at"]), + expires_at=int(raw["expires_at"]), + ) + except (TypeError, ValueError) as exc: + raise OAuthError( + f"Inflight session at {path} has invalid field types: {exc}", + code="OAUTH_INFLIGHT_CORRUPT", + details={"path": str(path)}, + ) from exc + if session.schema_version > INFLIGHT_SCHEMA_VERSION: + raise OAuthError( + f"Inflight session at {path} has schema_version=" + f"{session.schema_version}, newer than this CLI supports " + f"({INFLIGHT_SCHEMA_VERSION}). Re-run `mp login --start` after " + f"upgrading the CLI.", + code="OAUTH_INFLIGHT_SCHEMA_TOO_NEW", + details={ + "path": str(path), + "schema_version": session.schema_version, + "supported": INFLIGHT_SCHEMA_VERSION, + }, + ) + if session.expires_at < int(time.time()): + expired_at = datetime.fromtimestamp(session.expires_at, tz=timezone.utc) + raise OAuthError( + f"Inflight session at {path} expired at " + f"{expired_at.isoformat()}. " + f"Re-run `mp login --start`.", + code="OAUTH_INFLIGHT_EXPIRED", + details={"path": str(path), "expires_at": session.expires_at}, + ) + return session + + +def clear_inflight() -> None: + """Remove the inflight file if present. Idempotent. + + Called on ``--finish`` success to prevent a stale verifier from being + re-used. Failures are logged at WARNING but do not raise — the file's + job is done either way. + """ + path = inflight_path() + try: + path.unlink(missing_ok=True) + except OSError as exc: + logger.warning("Could not remove inflight file %s: %s", path, exc) + + +def find_available_callback_port() -> int: + """Probe :data:`CALLBACK_PORTS` for a port not currently in use. + + Same selection logic as :func:`flow._find_available_port` but exposed + here so :func:`accounts.login_unified_start` can build the + ``redirect_uri`` without depending on the OAuthFlow class. The + callback server is NOT started in the two-shot flow — the port is + reserved purely so the registered ``redirect_uri`` matches what the + same-machine flow would use, keeping the cached DCR client compatible + across both paths. + + Returns: + An available port from :data:`CALLBACK_PORTS`. + + Raises: + OAuthError: ``OAUTH_PORT_ERROR`` when every candidate port is busy. + """ + for port in CALLBACK_PORTS: + try: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as test_sock: + test_sock.bind(("127.0.0.1", port)) + return port + except OSError: + continue + raise OAuthError( + f"All OAuth callback ports ({CALLBACK_PORTS}) are busy.", + code="OAUTH_PORT_ERROR", + ) + + +def new_placeholder_dir(accounts_root_dir: Path) -> Path: + """Create a fresh ``.tmp-{nonce}/`` directory under ``accounts_root_dir``. + + Mode 0o700 on both the leaf and any newly-created parent. Mirrors the + pattern in :func:`accounts._login_unified_new_browser` so the two + flows produce structurally identical placeholder dirs (key precondition + for ``mp login --resume``). + + Args: + accounts_root_dir: Typically ``accounts_root()`` (i.e. + ``~/.mp/accounts/``). + + Returns: + Absolute path to the newly-created ``.tmp-{nonce}`` directory. + """ + nonce = secrets.token_hex(4) + old_umask = os.umask(0o077) + try: + accounts_root_dir.mkdir(parents=True, exist_ok=True, mode=0o700) + placeholder_dir = accounts_root_dir / f".tmp-{nonce}" + placeholder_dir.mkdir(mode=0o700) + finally: + os.umask(old_umask) + return placeholder_dir + + +def read_tokens_from_placeholder(placeholder_dir: Path) -> OAuthTokens: + """Reconstruct :class:`OAuthTokens` from ``placeholder_dir/tokens.json``. + + Mirror of :func:`bridge._read_browser_tokens` — the two paths must + accept the exact same on-disk shape so a ``mp login --finish`` write + is readable by ``mp account export-bridge`` and vice versa. + + Args: + placeholder_dir: A ``.tmp-{nonce}/`` directory containing + ``tokens.json`` (the canonical shape produced by + :func:`token_payload_bytes`). + + Returns: + The parsed :class:`OAuthTokens`. + + Raises: + OAuthError: ``OAUTH_TOKEN_ERROR`` when the file is missing, + unparseable, or missing required fields. + """ + path = placeholder_dir / "tokens.json" + if not path.exists(): + raise OAuthError( + f"No tokens.json in placeholder {placeholder_dir}.", + code="OAUTH_TOKEN_ERROR", + details={"placeholder_dir": str(placeholder_dir)}, + ) + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + raise OAuthError( + f"Could not read tokens.json at {path}: {exc}", + code="OAUTH_TOKEN_ERROR", + details={"path": str(path)}, + ) from exc + expires_raw = payload.get("expires_at") + if not isinstance(expires_raw, str) or not expires_raw: + raise OAuthError( + f"tokens.json at {path} is missing `expires_at`.", + code="OAUTH_TOKEN_ERROR", + details={"path": str(path)}, + ) + try: + expires_at = datetime.fromisoformat(expires_raw) + except ValueError as exc: + raise OAuthError( + f"tokens.json at {path} has an invalid `expires_at`.", + code="OAUTH_TOKEN_ERROR", + details={"path": str(path)}, + ) from exc + access_token = payload.get("access_token") + if not isinstance(access_token, str) or not access_token: + raise OAuthError( + f"tokens.json at {path} is missing `access_token`.", + code="OAUTH_TOKEN_ERROR", + details={"path": str(path)}, + ) + refresh_raw = payload.get("refresh_token") + refresh_token = ( + SecretStr(refresh_raw) if isinstance(refresh_raw, str) and refresh_raw else None + ) + return OAuthTokens( + access_token=SecretStr(access_token), + refresh_token=refresh_token, + expires_at=expires_at, + scope=str(payload.get("scope", "")), + token_type=str(payload.get("token_type", "Bearer")), + ) + + +def save_placeholder_meta(placeholder_dir: Path, *, region: str) -> None: + """Persist non-credential metadata (region, schema) into the placeholder. + + The metadata file lets ``mp login --resume`` recover region without + falling back to "read whichever client_X.json exists" — the prior + prototype's hack didn't survive multi-region setups (a user with both + ``client_us.json`` and ``client_eu.json`` would have ``--resume`` + pick the wrong region on the wrong placeholder). + + Args: + placeholder_dir: Target ``.tmp-{nonce}`` directory. Caller must + ensure it exists with mode 0o700. + region: The region the inflight session was bound to. + """ + meta = { + "schema_version": PLACEHOLDER_META_SCHEMA_VERSION, + "region": region, + "created_at": int(time.time()), + } + atomic_write_bytes( + placeholder_dir / "meta.json", + json.dumps(meta, indent=2).encode("utf-8"), + ) + + +def read_placeholder_meta(placeholder_dir: Path) -> dict[str, object] | None: + """Read ``placeholder_dir/meta.json``, distinguishing absent from corrupt. + + Returns ``None`` only when ``meta.json`` does not exist — this is the + signal that the placeholder predates the meta-writing code (legacy + ``--resume`` path) and the caller should fall back to a region + default. ANY other failure mode (unreadable file, malformed JSON, the + parsed value is not a dict) raises so the caller does not silently + pick the wrong region for a modern placeholder. Conflating + "absent" with "corrupt" here previously let an EU/IN placeholder + publish under ``"us"`` and then get cleaned up by the + ``NeedsRegionSwitchError`` path, destroying recoverable tokens. + + Args: + placeholder_dir: Placeholder dir to inspect. + + Returns: + Parsed dict if ``meta.json`` exists and is a valid JSON object. + ``None`` if ``meta.json`` does not exist. + + Raises: + OAuthError: ``OAUTH_PLACEHOLDER_META_CORRUPT`` when the file is + present but cannot be read, cannot be parsed as JSON, or + decodes to something other than a dict. + """ + path = placeholder_dir / "meta.json" + if not path.exists(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + raise OAuthError( + f"Placeholder meta at {path} could not be read: {exc}. " + f"Do not delete the placeholder dir — it may still contain " + f"recoverable tokens. Re-run `mp login --start --region ` " + f"if you cannot repair the meta.json by hand.", + code="OAUTH_PLACEHOLDER_META_CORRUPT", + details={"path": str(path)}, + ) from exc + if not isinstance(data, dict): + raise OAuthError( + f"Placeholder meta at {path} is not a JSON object " + f"(got {type(data).__name__}).", + code="OAUTH_PLACEHOLDER_META_CORRUPT", + details={"path": str(path)}, + ) + return data + + +def cache_me_in_placeholder(placeholder_dir: Path, me_resp: object) -> None: + """Cache the ``/me`` response in ``placeholder_dir/me.json``. + + Lets ``--resume`` skip the slow ``/me`` round-trip when the cache + is still fresh (per :func:`load_cached_me_from_placeholder`'s TTL). + The on-disk shape matches what :class:`MeCache` writes so a future + refactor could share a single reader. + + Args: + placeholder_dir: Target placeholder dir. + me_resp: A :class:`MeResponse`. Typed as ``object`` to avoid a + top-level import cycle (``me.py`` imports + :func:`atomic_write_bytes` from ``io_utils.py``, which imports + ``ConfigError`` from ``exceptions.py``; pulling + ``MeResponse`` here at module load time is fine but dragging + it into the public type would force every importer of + ``inflight`` to also import the ``/me`` model). + """ + from mixpanel_headless._internal.me import MeResponse + + if not isinstance(me_resp, MeResponse): # defensive — public surface + raise TypeError(f"Expected MeResponse, got {type(me_resp).__name__}") + data = me_resp.model_dump(mode="json") + data["cached_at"] = time.time() + atomic_write_bytes( + placeholder_dir / "me.json", + json.dumps(data, indent=2, default=str).encode("utf-8"), + ) + + +def load_cached_me_from_placeholder(placeholder_dir: Path) -> object | None: + """Return the cached :class:`MeResponse` if fresh, else ``None``. + + Freshness is defined as ``cached_at`` within :data:`INFLIGHT_TTL_SECONDS` + of the current time. Stale or absent caches return ``None`` so the + caller falls back to a fresh ``/me`` fetch — better to pay the round- + trip than to publish an account against project metadata that drifted + while the user was choosing. + + Args: + placeholder_dir: Placeholder dir to inspect. + + Returns: + :class:`MeResponse` if the cache is present and fresh, ``None`` + otherwise. Returns the model untyped to mirror + :func:`cache_me_in_placeholder`'s parameter shape. + """ + from mixpanel_headless._internal.me import MeResponse + + path = placeholder_dir / "me.json" + if not path.exists(): + return None + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError) as exc: + logger.warning("Could not read cached me.json at %s: %s", path, exc) + return None + if not isinstance(raw, dict): + return None + cached_at = raw.get("cached_at") + if isinstance(cached_at, (int, float)): + age = time.time() - cached_at + if age > INFLIGHT_TTL_SECONDS: + return None + try: + return MeResponse.model_validate(raw) + except Exception as exc: # noqa: BLE001 — pydantic.ValidationError + friends + logger.warning("Cached me.json at %s failed model validation: %s", path, exc) + return None diff --git a/src/mixpanel_headless/accounts.py b/src/mixpanel_headless/accounts.py index 2ccf146..a43ce7a 100644 --- a/src/mixpanel_headless/accounts.py +++ b/src/mixpanel_headless/accounts.py @@ -16,8 +16,9 @@ import os import shutil from collections.abc import Callable +from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Literal from pydantic import SecretStr @@ -44,7 +45,9 @@ AccountExistsError, ConfigError, InvalidArgumentError, + LoginFinishPublishError, MixpanelHeadlessError, + NeedsRegionSwitchError, OAuthError, ProjectNotFoundError, ) @@ -56,8 +59,136 @@ ) if TYPE_CHECKING: + from mixpanel_headless._internal.auth.inflight import InflightSession from mixpanel_headless._internal.me import MeProjectInfo, MeResponse + +PickMethod = Literal[ + "explicit", + "tty_picker", + "sole_survivor", + "sole_survivor_filtered", + "primary_org_lowest_id", + "fallback_with_unintegrated", + "fallback_with_demos", + "cross_region_only", + "no_projects", +] +"""Discriminator for how :func:`_resolve_project` arrived at its pick. + +The CLI uses this to render the right confirmation language ("auto-picked +from your most-active org" vs "your only active project" vs +"unintegrated — verify first") so the user can tell whether the choice +needs a sanity check. Programmatic consumers (the slash command, future +``auth_manager.py login_finish`` wrapper) branch on it without parsing +the human message. + +``sole_survivor`` means the user really has only one region-compatible +project (the funnel's ``region_compatible_count`` is 1). +``sole_survivor_filtered`` means there are multiple region-compatible +projects but only one survived the demo / unintegrated filters — the +distinction matters for the user-facing message ("your only active +project" vs "the only non-demo, integrated project").""" + + +@dataclass(frozen=True) +class ProjectPickResult: + """Structured result of :func:`_resolve_project`. + + Replaces the bare ``str | None`` return so the CLI can announce the + auto-pick method, the funnel counts, and the primary-org context in + the response envelope. Frozen so callers can pass it through layers + without worrying about accidental mutation. + + Attributes: + project_id: The chosen project ID, or ``None`` when ``method == + "no_projects"`` (account has zero accessible projects) or + ``"cross_region_only"`` (raised before the result is returned, + but populated on the carried instance for the envelope). + method: How the choice was made — see :data:`PickMethod`. + auth_region: Region the credential authenticated against. Mirrored + here so :class:`NeedsRegionSwitchError` and the envelope can + surface it without an extra parameter on every call site. + primary_org_id: ID of the org from which ``project_id`` was + picked (only populated when ``method`` is one of the + ``primary_org_*`` / ``fallback_*`` / ``sole_survivor`` paths). + primary_org_name: Human-readable name of the primary org. + primary_org_survivor_count: How many projects in the primary org + survived the filter chain. Lets the CLI render "picked from + your most-active org (28 projects)" instead of just naming + the org. + accessible_project_count: Total ``len(me_resp.projects)`` before + any filter — anchors the funnel display. + region_compatible_count: Count after the region filter only. + filtered_count: Count after region + ``is_demo=False`` + + ``has_integrated=True``. + demo_excluded: How many region-compatible projects were dropped + for ``is_demo=True``. + unintegrated_excluded: How many were dropped for + ``has_integrated`` not being ``True``. + cross_region_projects: Populated only when ``method == + "cross_region_only"``. List of ``(project_id, project_name, + domain)`` tuples so the envelope can suggest "run with + ``--region eu``". + """ + + project_id: str | None + method: PickMethod + auth_region: str + primary_org_id: str | None = None + primary_org_name: str | None = None + primary_org_survivor_count: int | None = None + accessible_project_count: int = 0 + region_compatible_count: int = 0 + filtered_count: int = 0 + demo_excluded: int = 0 + unintegrated_excluded: int = 0 + cross_region_projects: builtins.list[tuple[str, str, str]] | None = None + + +@dataclass(frozen=True) +class LoginFinishResult: + """Public return type of :func:`login_unified_finish` / :func:`login_unified_resume`. + + Bundles the published :class:`AccountSummary` with the + :class:`ProjectPickResult` so the CLI's ``--finish`` / ``--resume`` + handlers can render the §9 envelope including ``project_pick`` + metadata without re-running ``/me`` or re-deriving the algorithm. + + Also returned by the internal + :func:`_publish_account_from_tokens` helper that powers both flows. + + Attributes: + summary: The published account, with ``user_email`` / + ``project_id`` / ``project_name`` populated from ``/me``. + pick: The pick result (auto-pick metadata + funnel counts). + """ + + summary: AccountSummary + pick: ProjectPickResult + + +@dataclass(frozen=True) +class LoginStartResult: + """Return value of :func:`login_unified_start`. + + Bundles the persisted :class:`InflightSession` (also written to disk + at ``~/.mp/oauth/inflight.json``) with the freshly-built + ``authorize_url`` so the CLI can print the URL to the user without + re-deriving it. The URL is intentionally NOT persisted in the + inflight file — it's purely a function of the other fields, and the + on-disk shape stays minimal. + + Attributes: + inflight: The persisted session state. + authorize_url: Full ``mixpanel.com/oauth/authorize/?...`` URL the + user opens in their host browser. + """ + + inflight: InflightSession + authorize_url: str + + # Picker callback contract. The CLI supplies a TTY-aware implementation; # library callers can supply their own or pass ``None`` to fail-fast in # non-interactive contexts (E-8). @@ -1616,7 +1747,6 @@ def _login_unified_new_browser( import secrets from mixpanel_headless._internal.auth.flow import OAuthFlow - from mixpanel_headless._internal.auth.naming import default_account_name from mixpanel_headless._internal.io_utils import atomic_write_bytes auth_region: Region = region if region is not None else "us" @@ -1649,13 +1779,133 @@ def _login_unified_new_browser( from mixpanel_headless._internal.auth.token import token_payload_bytes atomic_write_bytes(placeholder_dir / "tokens.json", token_payload_bytes(tokens)) + # Save region into placeholder meta so a future ``mp login --resume`` + # can recover it without falling back to "read whichever + # client_X.json exists" — the prior prototype's hack failed for + # users with multiple regions registered. + from mixpanel_headless._internal.auth.inflight import save_placeholder_meta + + save_placeholder_meta(placeholder_dir, region=auth_region) + + # Hand off to the shared post-token tail. auto_pick=False + # preserves the existing same-machine behavior (TTY picker fires + # for multi-project; ConfigError E-8 for non-TTY without picker). + # The new --finish/--resume paths pass auto_pick=True for the + # headless case. + result = _publish_account_from_tokens( + cm, + tokens=tokens, + region=auth_region, + placeholder_dir=placeholder_dir, + name=name, + project=project, + project_picker=project_picker, + auto_pick=False, + cached_me=None, + progress=progress, + ) + return result.summary + + except Exception: + # Failure before the rename — placeholder still has the + # ``.tmp-`` prefix. The new contract: keep the placeholder when + # ``tokens.json`` exists in it, so ``mp login --resume PLACEHOLDER`` + # can finish the publish without re-running PKCE. Cleanup only + # fires for failures that landed before the token write. + tokens_path = placeholder_dir / "tokens.json" + if ( + placeholder_dir.exists() + and placeholder_dir.name.startswith(".tmp-") + and not tokens_path.exists() + ): + _safe_rmtree_warn(placeholder_dir) + raise - # /me probe via temporary OAuthBrowserAccount with placeholder name. - # The placeholder dir isn't wired to OnDiskTokenResolver yet, so we - # inject the freshly minted bearer via _FreshBrowserBearer. + +def _publish_account_from_tokens( + cm: ConfigManager, + *, + tokens: OAuthTokens, + region: Region, + placeholder_dir: Path, + name: str | None, + project: str | None, + project_picker: ProjectPicker | None, + auto_pick: bool, + cached_me: MeResponse | None, + progress: ProgressFactory, +) -> LoginFinishResult: + """Shared post-token tail of the oauth_browser login flows. + + Precondition: ``placeholder_dir`` exists, contains ``tokens.json`` matching + ``tokens``, and is named ``.tmp-{nonce}``. + + Runs ``/me`` (or reuses ``cached_me`` when fresh), resolves the project via + :func:`_resolve_project`, asserts the project's region matches ``region``, + derives the account name (``--name`` wins; otherwise via + :func:`default_account_name`), atomic-renames the placeholder to its final + dir, registers the account in the TOML, and persists the ``/me`` cache. + + Three callers use this: + + 1. :func:`_login_unified_new_browser` — same-machine ``mp login`` (passes + ``project_picker=tty_picker``, ``auto_pick=False``, ``cached_me=None``). + 2. :func:`login_unified_finish` — ``mp login --finish URL`` headless + (passes ``project_picker=None``, ``auto_pick=True``, ``cached_me=None``). + 3. :func:`login_unified_resume` — ``mp login --resume PATH`` recovery + (passes ``cached_me=`` to skip the slow /me round-trip + when the cache is still fresh). + + Rollback contract: on any failure BEFORE the rename, the placeholder is + left on disk as long as ``tokens.json`` exists in it (so the caller can + ``mp login --resume PLACEHOLDER``). The post-rename failure path keeps + today's behavior — ``add()`` failure rmtrees the published ``final_dir`` + so the user isn't left with tokens at the user-visible name and no TOML + block. + + Args: + cm: The config manager. + tokens: The freshly-exchanged tokens. ``access_token`` is read for + the in-memory ``/me`` probe via :class:`_FreshBrowserBearer`. + region: Auth region the credential is bound to. + placeholder_dir: Pre-existing ``.tmp-{nonce}`` directory. + name: Explicit account name (``None`` to derive from ``/me``). + project: Explicit project ID (``None`` to use picker / auto-pick). + project_picker: TTY-gated picker callback (``None`` to skip the + picker priority — the only valid combo with ``project_picker=None`` + is ``auto_pick=True``, ``project is not None``, or a single- + project account; otherwise :func:`_resolve_project` raises E-8). + auto_pick: Enable the filter-then-sort fallback when no explicit + project and no picker. ``True`` for headless ``--finish`` / + ``--resume``. + cached_me: When supplied, skip the ``/me`` HTTP call. Used by + ``--resume`` after a publish failure when the placeholder + already carries a fresh ``me.json``. + progress: CM factory wrapped around the (optional) ``/me`` + round-trip so the CLI's spinner / heartbeat stays active. + + Returns: + :class:`LoginFinishResult` carrying the published + :class:`AccountSummary` plus the :class:`ProjectPickResult` so + the CLI can render the §9 envelope. + + Raises: + AccountExistsError: Final name collides with an existing account. + ConfigError: Invalid name (regex), final dir already exists, or + the picker-or-raise priority chain raised E-8. + NeedsRegionSwitchError: ``/me`` returned 0 region-compatible projects. + ProjectNotFoundError: Explicit ``project`` not visible to ``/me``. + """ + from mixpanel_headless._internal.auth.inflight import cache_me_in_placeholder + from mixpanel_headless._internal.auth.naming import default_account_name + + # /me probe — skipped when --resume supplies a fresh cached payload. + if cached_me is not None: + me_resp = cached_me + else: temp_account = OAuthBrowserAccount( name="_tmp_login_unified_", - region=auth_region, + region=region, default_project=ProjectId("0"), ) with progress(_FETCH_ME_PROGRESS_MESSAGE): @@ -1665,90 +1915,438 @@ def _login_unified_new_browser( tokens.access_token.get_secret_value() ), ) + # Cache /me in the placeholder so a future ``--resume`` can skip + # the round-trip. Failures here aren't fatal — the cache is an + # optimization, not a correctness requirement. + try: + cache_me_in_placeholder(placeholder_dir, me_resp) + except OSError as exc: + logger.warning("Could not cache /me in %s: %s", placeholder_dir, exc) + + # Resolve project (priority chain). May raise NeedsRegionSwitchError + # for cross-region-only accounts; the placeholder stays on disk so + # the user can retry with --region after fixing the input. + pick = _resolve_project( + me_resp=me_resp, + explicit_project=project, + project_picker=project_picker, + auto_pick=auto_pick, + region=region, + ) + chosen_project = pick.project_id - # Resolve the project (priority chain). - chosen_project = _resolve_project( - me_resp=me_resp, - explicit_project=project, - project_picker=project_picker, + # E-2 cross-check (defense in depth — auto-pick already region-filters, + # but explicit / TTY paths can still pick a wrong-region project). + _assert_project_region_matches(me_resp, chosen_project, region) + + # Resolve final account name. + existing_names = {s.name for s in cm.list_accounts()} + final_name = ( + name if name is not None else default_account_name(me_resp, existing_names) + ) + if final_name in existing_names: + raise AccountExistsError(final_name) + + # Validate name regex BEFORE rename (defense against path traversal). + try: + final_dir = account_dir(final_name) + except ValueError as exc: + raise ConfigError( + f"Invalid account name {final_name!r}: must match " + f"`^[a-zA-Z0-9_-]{{1,64}}$`." + ) from exc + if final_dir.exists(): + raise ConfigError( + f"Final account directory {final_dir} already exists. Run " + f"`mp account remove {final_name}` first or pass --name." + ) + + os.rename(placeholder_dir, final_dir) + # Note: placeholder_dir is now the same directory as final_dir, but + # the ``placeholder_dir`` Python variable is NOT rebound. This is + # the symbol the OUTER except in callers checks via ``.startswith(".tmp-")``; + # leaving it pointing at the (now-non-existent) ``.tmp-*`` path is + # correct — it tells the outer handler "we already published, your + # rollback semantic doesn't apply." + + # Persist the account record. If add() raises, roll back final_dir + # so the user isn't left with tokens at the user-visible name and no + # [accounts.NAME] block. + try: + summary = add( + final_name, + type="oauth_browser", + region=region, + default_project=chosen_project, + ) + _persist_me_cache(final_name, me_resp) + return LoginFinishResult( + summary=_summary_with_me( + summary, me_resp=me_resp, project_id=chosen_project + ), + pick=pick, ) + except Exception: + _safe_rmtree_warn(final_dir) + raise - # E-2 cross-check via the shared helper (same wording as the - # legacy `login()` path). - _assert_project_region_matches(me_resp, chosen_project, auth_region) - # Resolve the account name (--name wins; otherwise derive). - existing_names = {s.name for s in cm.list_accounts()} - final_name = ( - name if name is not None else default_account_name(me_resp, existing_names) +def login_unified_start( + *, + region: Region | None = None, +) -> LoginStartResult: + """Two-shot oauth_browser flow — start step. + + Generates PKCE + state, ensures DCR client registration for ``region``, + builds the authorize URL, persists the verifier to + ``~/.mp/oauth/inflight.json`` (mode 0o600, 10-min TTL), and returns + a :class:`LoginStartResult` carrying both the persisted session and + the freshly-built authorize URL. Does NOT open a browser, does NOT + start a callback server, does NOT block. + + The caller is responsible for displaying the ``authorize_url`` to the + user (slash command, MCP server, scripted wrapper) and then invoking + :func:`login_unified_finish` with the redirect URL the user pastes back. + + Region defaults to ``"us"`` since PKCE commits to a region before + ``/me`` runs (no probe). EU / India users pass ``region`` explicitly. + + Args: + region: Mixpanel region to authenticate against. Defaults to + ``"us"``. Locked into the inflight file — ``--finish`` cannot + switch. + + Returns: + :class:`LoginStartResult` carrying the persisted + :class:`InflightSession` and the full authorize URL. + + Raises: + OAuthError: PKCE / DCR / port-binding failure. + """ + import time + from urllib.parse import urlencode + + from mixpanel_headless._internal.auth.client_registration import ( + OAUTH_BASE_URLS, + ensure_client_registered, + ) + from mixpanel_headless._internal.auth.flow import OAuthFlow + from mixpanel_headless._internal.auth.inflight import ( + INFLIGHT_SCHEMA_VERSION, + INFLIGHT_TTL_SECONDS, + InflightSession, + find_available_callback_port, + save_inflight, + ) + from mixpanel_headless._internal.auth.pkce import PkceChallenge + from mixpanel_headless._internal.auth.storage import OAuthStorage + + auth_region: Region = region if region is not None else "us" + pkce = PkceChallenge.generate() + import secrets as _secrets + + state = _secrets.token_urlsafe(32) + port = find_available_callback_port() + redirect_uri = f"http://localhost:{port}/callback" + + # Construct OAuthFlow purely to share its bumped-timeout httpx.Client + # for the DCR round-trip. The flow itself is not driven — we just + # need a client with the correct timeout for the cold SOCKS handshake + # that bites every first-time Cowork run. The `with` block ensures + # the client is closed even if DCR or URL building raises. + with OAuthFlow(region=auth_region, storage=OAuthStorage()) as flow: + client_info = ensure_client_registered( + http_client=flow.http_client, + region=auth_region, + redirect_uri=redirect_uri, + storage=flow.storage, ) - if final_name in existing_names: - # Use the structured AccountExistsError so callers / - # downstream tooling (the plugin's auth_manager.py) can - # pattern-match by class instead of parsing the message. - raise AccountExistsError(final_name) - - # Atomic publish: validate the name first so a malicious or - # path-traversal value (`../foo`, absolute path, etc.) raises BEFORE - # os.rename publishes tokens outside the accounts tree. account_dir() - # enforces the same `^[a-zA-Z0-9_-]{1,64}$` regex the Pydantic - # Account model uses; the surrounding except-clause cleans up the - # placeholder when this raises. - try: - final_dir = account_dir(final_name) - except ValueError as exc: - raise ConfigError( - f"Invalid account name {final_name!r}: must match " - f"`^[a-zA-Z0-9_-]{{1,64}}$`." + + base_url = OAUTH_BASE_URLS[auth_region] + authorize_url = f"{base_url}authorize/?" + urlencode( + { + "response_type": "code", + "client_id": client_info.client_id, + "redirect_uri": redirect_uri, + "state": state, + "code_challenge": pkce.challenge, + "code_challenge_method": "S256", + } + ) + + now = int(time.time()) + session = InflightSession( + schema_version=INFLIGHT_SCHEMA_VERSION, + region=auth_region, + client_id=client_info.client_id, + redirect_uri=redirect_uri, + pkce_verifier=pkce.verifier, + state=state, + created_at=now, + expires_at=now + INFLIGHT_TTL_SECONDS, + ) + save_inflight(session) + return LoginStartResult(inflight=session, authorize_url=authorize_url) + + +def login_unified_finish( + *, + pasted_url: str, + name: str | None = None, + project: str | None = None, + project_picker: ProjectPicker | None = None, + progress: ProgressFactory | None = None, +) -> LoginFinishResult: + """Two-shot oauth_browser flow — finish step. + + Reads ``~/.mp/oauth/inflight.json``, validates the pasted URL's state + against the inflight, exchanges the authorization code for tokens, + writes tokens to a fresh placeholder dir, then delegates to + :func:`_publish_account_from_tokens` with ``auto_pick=True``. + + On success: clears the inflight file, returns :class:`LoginFinishResult` + so the CLI can render the §9 envelope. On failure (including + state mismatch, /me errors, region mismatch, account name collision): + leaves the inflight in place so the user can retry without re-running + PKCE; leaves the placeholder in place when ``tokens.json`` was + already written so the user can ``mp login --resume PLACEHOLDER``. + + Args: + pasted_url: The redirect URL (or ``code=...&state=...`` query + string) the user pasted from their browser address bar. + name: Explicit account name (``None`` to derive from /me). + project: Explicit project ID (``None`` to auto-pick). + project_picker: Optional TTY picker. CLI sets this when + ``sys.stdin.isatty()``; ``None`` for headless callers. + progress: CM factory wrapped around the slow ``/me`` call. + + Returns: + :class:`LoginFinishResult` for the new account. + + Raises: + OAuthError: ``OAUTH_INFLIGHT_*`` (missing / expired / corrupt + inflight), ``OAUTH_STATE_MISMATCH`` / ``OAUTH_PASTE_ERROR`` + (bad pasted URL), or any error from + :meth:`OAuthFlow.exchange_code` / ``/me`` / publish. + NeedsRegionSwitchError: 0 region-compatible projects. + ProjectNotFoundError: Explicit ``project`` not visible. + AccountExistsError: Derived name collides; pass ``name=`` to override. + """ + from mixpanel_headless._internal.auth.flow import ( + OAuthFlow, + _parse_pasted_redirect, + ) + from mixpanel_headless._internal.auth.inflight import ( + clear_inflight, + load_inflight, + new_placeholder_dir, + save_placeholder_meta, + ) + from mixpanel_headless._internal.auth.storage import OAuthStorage + + if progress is None: + progress = lambda _msg: contextlib.nullcontext() # noqa: E731 + + inflight = load_inflight() + cb = _parse_pasted_redirect(pasted_url, expected_state=inflight.state) + + with OAuthFlow(region=inflight.region, storage=OAuthStorage()) as flow: + tokens = flow.exchange_code( + code=cb.code, + verifier=inflight.pkce_verifier, + client_id=inflight.client_id, + redirect_uri=inflight.redirect_uri, + ) + + placeholder_dir = new_placeholder_dir(accounts_root()) + atomic_write_bytes(placeholder_dir / "tokens.json", token_payload_bytes(tokens)) + save_placeholder_meta(placeholder_dir, region=inflight.region) + + cm = _config() + try: + result = _publish_account_from_tokens( + cm, + tokens=tokens, + region=inflight.region, # type: ignore[arg-type] + placeholder_dir=placeholder_dir, + name=name, + project=project, + project_picker=project_picker, + auto_pick=True, + cached_me=None, + progress=progress, + ) + except NeedsRegionSwitchError: + # Cross-region is fundamental, not transient — re-running + # --resume against this placeholder would hit the same error. + # Clean up the placeholder + inflight so the user can run + # `mp login --start --region eu` (or whichever region) without + # an orphan .tmp-* dir lingering forever. + _safe_rmtree_warn(placeholder_dir) + clear_inflight() + raise + except Exception as exc: + # Token exchange already consumed the OAuth code, so re-running + # --finish with the same paste will fail at the IdP. Clear the + # inflight (it's useless now). If the placeholder still has + # tokens.json, wrap as LoginFinishPublishError so the CLI can + # surface the resume command. Post-rename failures (handled by + # the inner except in _publish_account_from_tokens) leave the + # placeholder gone, in which case the original exception + # propagates unchanged. + clear_inflight() + if (placeholder_dir / "tokens.json").exists(): + raise LoginFinishPublishError( + placeholder_dir=placeholder_dir, + cause=exc, ) from exc - if final_dir.exists(): + raise + + clear_inflight() + use(result.summary.name) + return result + + +def login_unified_resume( + *, + placeholder_dir: Path, + name: str | None = None, + project: str | None = None, + project_picker: ProjectPicker | None = None, + progress: ProgressFactory | None = None, +) -> LoginFinishResult: + """Two-shot oauth_browser flow — post-publish-failure recovery. + + Reads ``tokens.json`` (and optionally a fresh ``me.json``) from + ``placeholder_dir``, then runs only the publish tail. No PKCE, no + code exchange — the tokens already exist. Clears the inflight file + on success. + + Used when ``mp login --finish`` succeeded at token exchange but + failed at publish (e.g., ``/me`` timed out, ``add()`` collision, + transient network blip during the ``add()`` write). The user re-runs + ``mp login --resume `` and we pick up where we left off. + + Args: + placeholder_dir: The ``.tmp-{nonce}`` directory left behind by a + failed ``--finish`` attempt. Must exist and contain + ``tokens.json``. + name: Explicit account name (``None`` to derive). + project: Explicit project ID (``None`` to auto-pick). + project_picker: Optional TTY picker. + progress: CM factory. + + Returns: + :class:`LoginFinishResult` for the recovered account. + + Raises: + ConfigError: ``placeholder_dir`` is not under + :func:`accounts_root`, doesn't look like a placeholder + (wrong name pattern), doesn't contain ``tokens.json``, or + its ``meta.json`` carries an invalid ``region``. + OAuthError: ``tokens.json`` is malformed, or + ``meta.json`` exists but is corrupt + (``OAUTH_PLACEHOLDER_META_CORRUPT``). + Plus all the failure modes of :func:`_publish_account_from_tokens`. + """ + from mixpanel_headless._internal.auth.inflight import ( + clear_inflight, + load_cached_me_from_placeholder, + read_placeholder_meta, + read_tokens_from_placeholder, + ) + from mixpanel_headless._internal.auth.storage import accounts_root as _accounts_root + + if progress is None: + progress = lambda _msg: contextlib.nullcontext() # noqa: E731 + + placeholder_dir = placeholder_dir.resolve() + # Guard against arbitrary on-disk paths. ``--resume`` will rename the + # placeholder into ``accounts_root()``; if a caller hands us + # ``/tmp/.tmp-foo``, we'd silently exfiltrate whoever's tokens are + # sitting there. Keep ``--resume`` strictly under the accounts root. + root = _accounts_root().resolve() + if not placeholder_dir.is_relative_to(root): + raise ConfigError( + f"--resume placeholder must live under {root}; got " + f"{placeholder_dir}. Pass the path printed by the failed " + f"`mp login --finish` invocation." + ) + if not placeholder_dir.exists(): + raise ConfigError(f"Placeholder directory {placeholder_dir} does not exist.") + if not placeholder_dir.name.startswith(".tmp-"): + raise ConfigError( + f"--resume expects a .tmp-* placeholder directory; got " + f"{placeholder_dir.name}. Pass the path printed by the failed " + f"`mp login --finish` invocation." + ) + if not (placeholder_dir / "tokens.json").exists(): + raise ConfigError( + f"No tokens.json in {placeholder_dir}. Nothing to resume — " + f"run `mp login --start` to begin a fresh login." + ) + + tokens = read_tokens_from_placeholder(placeholder_dir) + + # Region detection: prefer the placeholder's meta.json (written at + # --finish time). ``read_placeholder_meta`` returns ``None`` ONLY + # when meta.json is absent (legacy placeholders predating the + # meta-writing code) — corrupt / unreadable meta now raises + # ``OAUTH_PLACEHOLDER_META_CORRUPT``. The "us" default is therefore + # only reached for legacy placeholders, never for a modern EU/IN + # placeholder with a busted meta.json (which would otherwise hit + # the NeedsRegionSwitchError cleanup path below and destroy + # recoverable tokens). + meta = read_placeholder_meta(placeholder_dir) + region: Region = "us" + if meta is not None: + meta_region = meta.get("region") + if isinstance(meta_region, str) and meta_region in ("us", "eu", "in"): + region = meta_region # type: ignore[assignment] + else: raise ConfigError( - f"Final account directory {final_dir} already exists. Run " - f"`mp account remove {final_name}` first or pass --name." - ) - os.rename(placeholder_dir, final_dir) - placeholder_dir = final_dir - - # IMPORTANT: only the inner add() + cache write are allowed - # between this rename and the inner try-except. Anything inserted - # here would silently lose rollback, because the outer except's - # ``startswith(".tmp-")`` guard no longer matches ``final_dir`` - # (we just rebound ``placeholder_dir`` to it) — only the inner - # except's ``_safe_rmtree_warn(final_dir)`` covers post-rename - # failures. New post-rename steps must add their own rollback. - - # Persist the account record. If add() raises (a race added the - # same name between list_accounts() and now, the TOML write - # failed, etc.), roll back the on-disk publish so the user is - # not left with tokens at the user-visible name and no - # [accounts.NAME] block — that combination breaks - # `mp account remove` and blocks the next `mp login`. - try: - summary = add( - final_name, - type="oauth_browser", - region=auth_region, - default_project=chosen_project, + f"Placeholder {placeholder_dir} has invalid region " + f"{meta_region!r} in meta.json. Repair meta.json by hand " + f"or re-run `mp login --start --region `." ) - # Persist /me cache so the orchestrator's contract holds: - # the next MeService call returns instantly from disk - # instead of paying another /me round-trip. Inside the same - # try so a cache-write failure also rolls back the publish. - _persist_me_cache(final_name, me_resp) - return _summary_with_me(summary, me_resp=me_resp, project_id=chosen_project) - except Exception: - _safe_rmtree_warn(final_dir) - raise - except Exception: - # Failure before the rename — placeholder still has the - # ``.tmp-`` prefix. (The post-rename rollback above handles - # add() failure inline; this branch only fires when something - # earlier in the try block raised.) - if placeholder_dir.name.startswith(".tmp-"): - _safe_rmtree_warn(placeholder_dir) + cached_me = load_cached_me_from_placeholder(placeholder_dir) + + cm = _config() + try: + result = _publish_account_from_tokens( + cm, + tokens=tokens, + region=region, + placeholder_dir=placeholder_dir, + name=name, + project=project, + project_picker=project_picker, + auto_pick=True, + cached_me=cached_me, # type: ignore[arg-type] # runtime MeResponse cast + progress=progress, + ) + except NeedsRegionSwitchError: + # Same as --finish: cross-region is unrecoverable via --resume. + # Clean up the placeholder so the user can `mp login --start + # --region eu` cleanly. + _safe_rmtree_warn(placeholder_dir) + clear_inflight() + raise + except Exception as exc: + # Tokens still exist on disk; wrap so the slash command can + # render the resume command (same path the user passed in). + if (placeholder_dir / "tokens.json").exists(): + raise LoginFinishPublishError( + placeholder_dir=placeholder_dir, + cause=exc, + ) from exc raise + clear_inflight() + use(result.summary.name) + return result + def _login_unified_new_credential( cm: ConfigManager, @@ -1874,11 +2472,14 @@ def _login_unified_new_credential( with progress(_FETCH_ME_PROGRESS_MESSAGE): me_resp = _fetch_me(temp_account) - chosen_project = _resolve_project( + pick = _resolve_project( me_resp=me_resp, explicit_project=project, project_picker=project_picker, + auto_pick=False, + region=resolved_region, ) + chosen_project = pick.project_id # Resolve final name (--name wins; otherwise derive from /me). final_name: str @@ -1915,69 +2516,324 @@ def _resolve_project( me_resp: MeResponse, explicit_project: str | None, project_picker: ProjectPicker | None, -) -> str | None: + auto_pick: bool = False, + region: Region = "us", +) -> ProjectPickResult: """Apply the project-selection priority chain. + Returns a :class:`ProjectPickResult` carrying the chosen project ID + plus structured metadata (method, primary-org info, funnel counts) so + callers can announce how the choice was made. Replaces the prior + bare ``str | None`` return. + + The selection chain is: + + 1. ``explicit_project`` set → validate visibility, return + ``method="explicit"``. + 2. ``MP_PROJECT_ID`` env set → validate visibility (hard-fail if + stale), return ``method="explicit"``. + 3. Region filter — drop projects whose ``domain`` doesn't match + ``region`` per :data:`DOMAIN_FOR_REGION`. If 0 region-compatible + projects remain, raise :class:`NeedsRegionSwitchError`. + 4. ``project_picker`` set → invoke against the region-compatible + list (interactive users may want a demo or unintegrated project), + return ``method="tty_picker"``. + 5. ``auto_pick=True`` → run the filter-then-sort algorithm: + drop ``is_demo`` and ``has_integrated=False`` projects; pick from + the highest-survivor-count org, lowest project ID within. Falls + back through unintegrated → demos when no project survives the + aggressive filter. + 6. ``auto_pick=False`` → existing E-8 behavior: raise ``ConfigError`` + when the user has multiple region-compatible projects and no picker. + Args: me_resp: Parsed :class:`MeResponse`. explicit_project: ``--project`` argument (priority 1). - project_picker: Picker callback for multi-project case. + project_picker: Picker callback for the TTY case. + auto_pick: Enable the filter-then-sort fallback when no picker + and no explicit pick. ``True`` for ``mp login --finish`` / + ``--resume`` (the headless paths). ``False`` for same-machine + ``mp login`` (preserves today's picker-or-raise behavior). + region: Region the credential authenticated against. Used to + filter ``MeProjectInfo.domain`` and to populate + ``ProjectPickResult.auth_region``. Returns: - The resolved project ID, or ``None`` when the user has zero projects. + :class:`ProjectPickResult` with ``project_id`` set on success + (``None`` when ``method == "no_projects"``). Raises: - ConfigError: ``--project N`` not in /me (E-6), ``MP_PROJECT_ID`` - set in the environment but not visible to this account (the - stale env var would shadow the picker's choice on every - subsequent CLI call via the resolver's env-first priority, - silently breaking the next ``mp query`` — surface it now), - or non-interactive multi-project context with no picker + ProjectNotFoundError: ``explicit_project`` not in ``/me``. + ConfigError: ``MP_PROJECT_ID`` set in env but not visible, or + multi-project context with no picker and ``auto_pick=False`` (E-8). + NeedsRegionSwitchError: Zero region-compatible projects after + region filter. """ projects = me_resp.projects - project_keys = builtins.list(projects.keys()) + accessible_n = len(projects) + + # No projects at all — nothing to pick. Caller leaves default_project + # unset; the user runs ``mp project use ID`` once one becomes accessible. + if accessible_n == 0: + return ProjectPickResult( + project_id=None, + method="no_projects", + auth_region=region, + accessible_project_count=0, + ) - # Priority 1: explicit --project. + # Priority 1: explicit --project. Skips all filtering — the user knows + # what they want; ``_assert_project_region_matches`` catches cross-region + # picks downstream with a more actionable error. if explicit_project is not None: - if explicit_project in projects: - return explicit_project - # Use ProjectNotFoundError so the CLI can map it to exit code 4 - # (NOT_FOUND) and downstream callers can pattern-match by - # class. The structured `available_projects` field carries the - # same information that was previously embedded in the message. - raise ProjectNotFoundError(explicit_project, available_projects=project_keys) - - # Priority 2: MP_PROJECT_ID env. Either it resolves now, or we - # hard-fail — falling through silently just defers the failure to - # the user's next `mp query` (the resolver reads MP_PROJECT_ID - # ahead of the persisted default_project, so the picker's choice - # would be shadowed). Better to flag the stale env var here. + if explicit_project not in projects: + raise ProjectNotFoundError( + explicit_project, + available_projects=builtins.list(projects.keys()), + ) + return ProjectPickResult( + project_id=explicit_project, + method="explicit", + auth_region=region, + accessible_project_count=accessible_n, + region_compatible_count=accessible_n, + filtered_count=accessible_n, + ) + + # Priority 2: MP_PROJECT_ID env. Treated as user-explicit — hard-fail + # if not visible (the resolver reads MP_PROJECT_ID first on every + # subsequent CLI call, so a stale value would silently shadow the + # picker's choice). env_project = os.environ.get("MP_PROJECT_ID") if env_project: - if env_project in projects: - return env_project - accessible_lines = "\n".join( - f" - {pid} : {info.name} ({info.domain or '(no domain)'})" + if env_project not in projects: + accessible_lines = "\n".join( + f" - {pid} : {info.name} ({info.domain or '(no domain)'})" + for pid, info in projects.items() + ) + raise ConfigError( + f"MP_PROJECT_ID={env_project} is not visible to this account.\n\n" + f"Accessible projects:\n{accessible_lines}\n\n" + f"Either unset MP_PROJECT_ID, or pass --project ID with a " + f"visible value. (Subsequent `mp` calls would otherwise read " + f"MP_PROJECT_ID first and silently fail with auth errors.)" + ) + return ProjectPickResult( + project_id=env_project, + method="explicit", + auth_region=region, + accessible_project_count=accessible_n, + region_compatible_count=accessible_n, + filtered_count=accessible_n, + ) + + # Legacy path: when auto_pick=False, preserve the existing + # same-machine behavior precisely. No region pre-filter (the picker + # sees all projects; ``_assert_project_region_matches`` catches + # wrong-region picks after the fact). This keeps the behavior + # documented as "TTY picker fires for multi-project, E-8 raises for + # non-TTY without picker" intact for ``mp login`` (no --finish flag). + if not auto_pick: + return _resolve_project_legacy( + me_resp=me_resp, + project_picker=project_picker, + region=region, + accessible_n=accessible_n, + ) + + # Auto-pick path (--finish / --resume). Region-filter first, then + # apply the aggressive filter cascade with primary-org / lowest-id + # tiebreak. TTY picker (if present) preempts auto-pick; same-machine + # SSH-into-Cowork users still get to choose interactively. + from mixpanel_headless._internal.auth.client_registration import DOMAIN_FOR_REGION + + auth_domain = DOMAIN_FOR_REGION.get(region) + region_compat: dict[str, MeProjectInfo] = {} + if auth_domain is None: + # Unknown region (shouldn't happen under the Region literal) — + # don't filter, treat all as region-compat. + region_compat = dict(projects) + else: + for pid, info in projects.items(): + # Projects with no ``domain`` (older payloads) are kept — + # better to surface them than to silently exclude on missing data. + if info.domain is None or info.domain == auth_domain: + region_compat[pid] = info + region_n = len(region_compat) + + if region_n == 0: + # All projects in a different cluster. Refuse to publish a + # wrong-region account; surface alternatives. + cross = [ + (pid, info.name, info.domain or "(no domain)") for pid, info in projects.items() + ] + per_region: dict[str, int] = {} + for _pid, _name, domain in cross: + r = _domain_to_region(domain) or "(unknown)" + per_region[r] = per_region.get(r, 0) + 1 + breakdown = ", ".join( + f"{count} in {r}" for r, count in sorted(per_region.items()) ) - raise ConfigError( - f"MP_PROJECT_ID={env_project} is not visible to this account.\n\n" - f"Accessible projects:\n{accessible_lines}\n\n" - f"Either unset MP_PROJECT_ID, or pass --project ID with a " - f"visible value. (Subsequent `mp` calls would otherwise read " - f"MP_PROJECT_ID first and silently fail with auth errors.)" + suggestion = "" + for r in ("eu", "in", "us"): + if r != region and per_region.get(r): + suggestion = ( + f"\n\nRun `mp login --start --region {r}` to create a " + f"separate account for that region." + ) + break + pick = ProjectPickResult( + project_id=None, + method="cross_region_only", + auth_region=region, + accessible_project_count=accessible_n, + region_compatible_count=0, + filtered_count=0, + cross_region_projects=cross, + ) + raise NeedsRegionSwitchError( + f"No projects in region {region} accessible to this account " + f"({breakdown}).{suggestion}", + pick=pick, ) - # Priority 3: single-project auto-pick. - if len(projects) == 1: - return project_keys[0] - if not projects: - # No projects at all → leave default_project unset; caller can set - # it later via `mp project use ID` once one becomes accessible. - return None + # Aggressive filter chain: drop demos, then drop unintegrated. Both + # fields are undeclared on MeProjectInfo (live API returns them via + # model_extra). + def _is_demo(info: MeProjectInfo) -> bool: + """True if ``info`` is a Mixpanel demo project. + + Read from the live ``/me`` payload's ``is_demo`` field via + ``model_extra`` (the field is not declared on + :class:`MeProjectInfo` so it travels through pydantic's extras). + Demo projects are excluded from auto-pick unless every + region-compatible project is a demo (the + ``fallback_with_demos`` last resort). + """ + return bool((info.model_extra or {}).get("is_demo")) + + def _is_integrated(info: MeProjectInfo) -> bool: + """True if ``info`` has received events at some point. + + ``has_integrated == True`` means the project has data; + anything else (missing, ``False``, ``None``) means + "abandoned / never-shipped" for auto-pick purposes. Same + ``model_extra`` story as :func:`_is_demo`. Unintegrated + projects are excluded unless every non-demo project is + unintegrated (the ``fallback_with_unintegrated`` cascade + step). + """ + return (info.model_extra or {}).get("has_integrated") is True + + no_demos = {pid: info for pid, info in region_compat.items() if not _is_demo(info)} + demo_excluded = region_n - len(no_demos) + filtered = {pid: info for pid, info in no_demos.items() if _is_integrated(info)} + unintegrated_excluded = len(no_demos) - len(filtered) + + # Sole-survivor short-circuit at the region-compat level. + if region_n == 1: + only = next(iter(region_compat)) + only_info = region_compat[only] + org = me_resp.organizations.get(str(only_info.organization_id)) + return ProjectPickResult( + project_id=only, + method="sole_survivor", + auth_region=region, + primary_org_id=str(only_info.organization_id), + primary_org_name=org.name if org is not None else None, + primary_org_survivor_count=1, + accessible_project_count=accessible_n, + region_compatible_count=region_n, + filtered_count=len(filtered), + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + # TTY picker preempts auto-pick. Operates on region-compat (NOT the + # aggressive filter) so interactive users can still pick a demo or + # unintegrated project if they want. + if project_picker is not None: + + def _sort_key(item: tuple[str, MeProjectInfo]) -> tuple[str, str]: + """Build (org_name, project_name) sort key, both case-folded.""" + _pid, info = item + org = me_resp.organizations.get(str(info.organization_id)) + org_name = org.name if org is not None else f"~org {info.organization_id}" + return (org_name.lower(), info.name.lower()) + + sorted_projects = sorted(region_compat.items(), key=_sort_key) + chosen = project_picker(me_resp, sorted_projects) + return ProjectPickResult( + project_id=chosen, + method="tty_picker", + auth_region=region, + accessible_project_count=accessible_n, + region_compatible_count=region_n, + filtered_count=len(filtered), + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + # No picker, no explicit, auto_pick=True → run the algorithm. + return _auto_pick_from_filtered( + me_resp=me_resp, + region=region, + accessible_n=accessible_n, + region_compat=region_compat, + no_demos=no_demos, + filtered=filtered, + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + +def _resolve_project_legacy( + *, + me_resp: MeResponse, + project_picker: ProjectPicker | None, + region: Region, + accessible_n: int, +) -> ProjectPickResult: + """Legacy ``auto_pick=False`` path — preserves pre-043 same-machine behavior. + + No region filter (the picker sees all projects; the post-pick + ``_assert_project_region_matches`` cross-check raises E-2 on wrong + region). Single-project auto-pick when there's exactly one project. + Picker fires for multi-project. Raises ``ConfigError`` (E-8) when + multi-project and no picker. + + Args: + me_resp: Parsed ``/me``. + project_picker: TTY picker callback. + region: Auth region (carried into the result for the envelope). + accessible_n: Total project count (already computed by caller). + + Returns: + :class:`ProjectPickResult` with method ``"explicit"`` (single + project) or ``"tty_picker"`` (after picker fires). + + Raises: + ConfigError: Multi-project context with no picker (E-8). + """ + projects = me_resp.projects + if accessible_n == 1: + only = next(iter(projects)) + only_info = projects[only] + org = me_resp.organizations.get(str(only_info.organization_id)) + return ProjectPickResult( + project_id=only, + method="sole_survivor", + auth_region=region, + primary_org_id=str(only_info.organization_id), + primary_org_name=org.name if org is not None else None, + primary_org_survivor_count=1, + accessible_project_count=accessible_n, + region_compatible_count=accessible_n, + filtered_count=accessible_n, + ) - # Priority 4: picker callback (raises E-8 if absent). if project_picker is None: accessible_lines = "\n".join( f" - {pid} : {info.name} ({info.domain or '(no domain)'})" @@ -1990,16 +2846,6 @@ def _resolve_project( f"Pass --project ID to select one explicitly, or set MP_PROJECT_ID." ) - # Group projects by org first, then alphabetize within. With many - # projects spread across multiple orgs, a name-only sort interleaves - # orgs and forces the user to scan the whole list. The (org, project) - # key produces contiguous per-org blocks. Both axes lowercased so - # mixed-case names ("Demo Projects" vs "demo team") collate - # intuitively instead of by raw byte order. Unknown org IDs (a - # project tied to an org missing from /me.organizations) fall back - # to a synthetic ``"~org {id}"`` key — the leading tilde sinks them - # to the bottom rather than the chaotic position they would land at - # if we used the raw integer-as-string. def _sort_key(item: tuple[str, MeProjectInfo]) -> tuple[str, str]: """Build (org_name, project_name) sort key, both case-folded.""" _pid, info = item @@ -2008,15 +2854,169 @@ def _sort_key(item: tuple[str, MeProjectInfo]) -> tuple[str, str]: return (org_name.lower(), info.name.lower()) sorted_projects = sorted(projects.items(), key=_sort_key) - return project_picker(me_resp, sorted_projects) + chosen = project_picker(me_resp, sorted_projects) + return ProjectPickResult( + project_id=chosen, + method="tty_picker", + auth_region=region, + accessible_project_count=accessible_n, + region_compatible_count=accessible_n, + filtered_count=accessible_n, + ) + + +def _auto_pick_from_filtered( + *, + me_resp: MeResponse, + region: Region, + accessible_n: int, + region_compat: dict[str, MeProjectInfo], + no_demos: dict[str, MeProjectInfo], + filtered: dict[str, MeProjectInfo], + demo_excluded: int, + unintegrated_excluded: int, +) -> ProjectPickResult: + """Apply the auto-pick filter cascade and return the structured result. + + Tries the aggressively-filtered set (region + non-demo + integrated) + first. Falls back to ``no_demos`` (re-include unintegrated), then to + ``region_compat`` (re-include demos). The method enum tells the CLI + which fallback fired so it can surface the appropriate caveat. + + Args: + me_resp: Parsed ``/me`` response (used to look up org names). + region: Region the credential authenticated against. + accessible_n: Total ``len(me_resp.projects)`` (for the funnel). + region_compat: Projects after region filter (the broadest pool). + no_demos: Projects after region + ``is_demo=False``. + filtered: Projects after region + ``is_demo=False`` + + ``has_integrated=True``. + demo_excluded: Count of region-compat projects dropped for is_demo. + unintegrated_excluded: Count of non-demo projects dropped for + ``has_integrated`` not being True. + + Returns: + The :class:`ProjectPickResult` carrying the pick + funnel counts. + """ + region_n = len(region_compat) + filtered_n = len(filtered) + + if filtered: + chosen, primary_org_id, survivor_count = _pick_from_primary_org( + filtered, me_resp.organizations + ) + org = me_resp.organizations.get(str(primary_org_id)) + return ProjectPickResult( + project_id=chosen, + method=( + "primary_org_lowest_id" if filtered_n > 1 else "sole_survivor_filtered" + ), + auth_region=region, + primary_org_id=str(primary_org_id), + primary_org_name=org.name if org is not None else None, + primary_org_survivor_count=survivor_count, + accessible_project_count=accessible_n, + region_compatible_count=region_n, + filtered_count=filtered_n, + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + if no_demos: + chosen, primary_org_id, survivor_count = _pick_from_primary_org( + no_demos, me_resp.organizations + ) + org = me_resp.organizations.get(str(primary_org_id)) + return ProjectPickResult( + project_id=chosen, + method="fallback_with_unintegrated", + auth_region=region, + primary_org_id=str(primary_org_id), + primary_org_name=org.name if org is not None else None, + primary_org_survivor_count=survivor_count, + accessible_project_count=accessible_n, + region_compatible_count=region_n, + filtered_count=filtered_n, + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + # Last resort: re-include demos. + chosen, primary_org_id, survivor_count = _pick_from_primary_org( + region_compat, me_resp.organizations + ) + org = me_resp.organizations.get(str(primary_org_id)) + return ProjectPickResult( + project_id=chosen, + method="fallback_with_demos", + auth_region=region, + primary_org_id=str(primary_org_id), + primary_org_name=org.name if org is not None else None, + primary_org_survivor_count=survivor_count, + accessible_project_count=accessible_n, + region_compatible_count=region_n, + filtered_count=filtered_n, + demo_excluded=demo_excluded, + unintegrated_excluded=unintegrated_excluded, + ) + + +def _pick_from_primary_org( + pool: dict[str, MeProjectInfo], + orgs: dict[str, Any], +) -> tuple[str, int, int]: + """Pick the org with the most ``pool`` survivors, then lowest project ID. + + Tiebreak across orgs: lowest org ID (deterministic; orgs with equal + survivor counts get sorted by integer ID). Tiebreak within an org: + lowest project ID (numeric). Both rules are stable across runs so a + re-run on the same ``/me`` produces the same auto-pick. + + Returns ``(project_id, primary_org_id, survivor_count)``. + + Args: + pool: Project subset to pick from. Must be non-empty. + orgs: ``me_resp.organizations``. Currently unconsulted by the + tiebreak rules (which use IDs only); accepted as a parameter + for symmetry with the caller and so the algorithm can be + extended with name-based heuristics without breaking the call + site. + + Returns: + Tuple of (chosen project ID, the primary org ID, survivor count + in that org). + """ + del orgs # unused — see docstring + counts: dict[int, int] = {} + for info in pool.values(): + counts[info.organization_id] = counts.get(info.organization_id, 0) + 1 + # Highest count wins; tiebreak by lowest org ID. + primary_org_id = sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0] + survivor_count = counts[primary_org_id] + in_primary = [ + pid for pid, info in pool.items() if info.organization_id == primary_org_id + ] + # Lowest project ID (numeric) wins. Numeric IDs are guaranteed by the + # /me response shape; non-numeric IDs would TypeError here, which is + # the right surface signal — silent fallback to lex-sort would mask a + # schema regression. + chosen = min(in_primary, key=lambda pid: int(pid)) + return chosen, primary_org_id, survivor_count __all__ = [ + "LoginFinishResult", + "LoginStartResult", + "PickMethod", + "ProjectPickResult", "add", "export_bridge", "list", "login", "login_unified", + "login_unified_finish", + "login_unified_resume", + "login_unified_start", "logout", "remove", "remove_bridge", diff --git a/src/mixpanel_headless/cli/commands/login.py b/src/mixpanel_headless/cli/commands/login.py index 641f1f7..32ed352 100644 --- a/src/mixpanel_headless/cli/commands/login.py +++ b/src/mixpanel_headless/cli/commands/login.py @@ -1,18 +1,22 @@ -"""``mp login`` Typer command (043 / AIE-117). +"""``mp login`` Typer command. Thin wrapper over :func:`accounts.login_unified` that adds the TTY-aware project / org pickers and the structured stdout success line. -All other orchestration (auth-type detection, region probe, name -derivation, atomic publish, re-login state machine) lives in the -library. +The two-shot ``--start`` / ``--finish`` / ``--resume`` flow added on top +emits machine-parseable JSON envelopes on stdout (rather than the +single-line success summary) so the slash command and other programmatic +consumers can drive it without parsing prose. -Reference: ``specs/043-frictionless-auth/contracts/cli-commands.md`` §1. +Reference: ``specs/043-frictionless-auth/contracts/cli-commands.md`` §1 +plus the two-shot extension in the implementation plan. """ from __future__ import annotations +import json import sys -from typing import TYPE_CHECKING, Annotated +from pathlib import Path +from typing import TYPE_CHECKING, Annotated, Any import typer @@ -24,6 +28,11 @@ handle_errors, status_spinner, ) +from mixpanel_headless.exceptions import ( + InvalidArgumentError, + LoginFinishPublishError, + NeedsRegionSwitchError, +) if TYPE_CHECKING: from mixpanel_headless._internal.me import MeProjectInfo, MeResponse @@ -118,6 +127,270 @@ def _project_picker_tty( raise ConfigError("Could not pick a project after 3 attempts. Aborting.") +_SCHEMA_VERSION = 1 +"""Response envelope schema version for ``--start`` / ``--finish`` / ``--resume``. + +Bumped only on non-additive changes that would break a slash command +parser compiled against the older shape. Adding fields is fine; removing +or renaming requires a bump.""" + +_PROJECT_NEXT = [ + {"command": "mp project list", "label": "See all accessible projects"}, + {"command": "mp project use ", "label": "Switch to a different project"}, +] +"""Verbatim mirror of ``auth_manager.py:_PROJECT_NEXT`` so the slash +command's existing renderer works against the success envelope without +new branches.""" + + +def _build_start_envelope(start: accounts_ns.LoginStartResult) -> dict[str, Any]: + """Build the ``mp login --start`` JSON envelope. + + Args: + start: The :class:`accounts_ns.LoginStartResult` from + :func:`accounts.login_unified_start`. + + Returns: + Dict serializable as the documented ``state: ok`` start envelope. + """ + from mixpanel_headless._internal.auth.inflight import inflight_path + + inflight = start.inflight + return { + "schema_version": _SCHEMA_VERSION, + "state": "ok", + "authorize_url": start.authorize_url, + "redirect_uri": inflight.redirect_uri, + "expires_at": inflight.expires_at, + "region": inflight.region, + "inflight_path": str(inflight_path()), + } + + +def _build_finish_envelope(result: accounts_ns.LoginFinishResult) -> dict[str, Any]: + """Build the ``state: ok`` envelope for ``--finish`` / ``--resume``. + + Args: + result: :class:`accounts.LoginFinishResult` from + :func:`accounts.login_unified_finish` or + :func:`accounts.login_unified_resume`. + + Returns: + Dict matching the §9 ``state: ok`` envelope shape. + """ + summary = result.summary + pick = result.pick + project_block: dict[str, Any] | None = None + if summary.project_id is not None: + project_block = { + "id": summary.project_id, + "name": summary.project_name, + } + user_block: dict[str, Any] | None = None + if summary.user_email is not None: + user_block = {"email": summary.user_email} + return { + "schema_version": _SCHEMA_VERSION, + "state": "ok", + "account": { + "name": summary.name, + "type": summary.type, + "region": summary.region, + }, + "user": user_block, + "project": project_block, + "project_pick": { + "auto_picked": pick.method not in ("explicit", "tty_picker"), + "method": pick.method, + "primary_org_id": pick.primary_org_id, + "primary_org_name": pick.primary_org_name, + "primary_org_survivor_count": pick.primary_org_survivor_count, + "accessible_project_count": pick.accessible_project_count, + "region_compatible_count": pick.region_compatible_count, + "filtered_count": pick.filtered_count, + "demo_excluded": pick.demo_excluded, + "unintegrated_excluded": pick.unintegrated_excluded, + }, + "next": _PROJECT_NEXT, + } + + +def _build_publish_failure_envelope(exc: LoginFinishPublishError) -> dict[str, Any]: + """Build the ``state: error code: LOGIN_FINISH_PUBLISH_FAILED`` envelope. + + Surfaces the placeholder path + resume command so the slash command + can tell the user exactly how to recover from a post-exchange + publish failure (bad ``--name``, project not visible, name + collision, transient ``/me`` failure). Re-running ``--finish`` won't + work because the OAuth code has already been consumed. + + Args: + exc: The :class:`LoginFinishPublishError` carrying placeholder + path + original cause. + + Returns: + Dict matching the documented post-exchange-failure envelope. + """ + resume_cmd = f"mp login --resume {exc.placeholder_dir}" + return { + "schema_version": _SCHEMA_VERSION, + "state": "error", + "error": { + "code": "LOGIN_FINISH_PUBLISH_FAILED", + "message": exc.message, + "actionable": True, + "details": { + "placeholder_dir": str(exc.placeholder_dir), + "original_code": exc.original_code, + "original_message": exc.original_message, + }, + }, + "resume_hint": { + "command": resume_cmd, + "label": ( + "Recover with `mp login --resume` — re-running `--finish` " + "won't work because the OAuth code is already consumed." + ), + }, + "next": [ + {"command": resume_cmd, "label": "Resume with corrected args"}, + ], + } + + +def _build_region_switch_envelope(exc: NeedsRegionSwitchError) -> dict[str, Any]: + """Build the ``state: error code: NEEDS_REGION_SWITCH`` envelope. + + Args: + exc: The :class:`NeedsRegionSwitchError` raised by the auto-pick + algorithm when 0 region-compatible projects exist. + + Returns: + Dict matching the §9 ``state: error`` envelope shape. + """ + pick = exc.pick + cross = pick.cross_region_projects or [] + return { + "schema_version": _SCHEMA_VERSION, + "state": "error", + "error": { + "code": "NEEDS_REGION_SWITCH", + "message": exc.message, + "actionable": True, + "details": { + "auth_region": pick.auth_region, + "cross_region_projects": [ + {"id": pid, "name": name, "domain": domain} + for pid, name, domain in cross + ], + }, + }, + } + + +def _emit_json(payload: dict[str, Any]) -> None: + """Print ``payload`` as a single line of compact JSON to stdout. + + Single-line so the slash command's ``json.loads(stdout)`` works + without scanning for object boundaries. The encoder explicitly + accepts ``datetime`` and :class:`Path` (the only non-JSON-native + types intentionally used in the §9 envelopes); anything else + raises ``TypeError`` so a future refactor that drops a token, a + Pydantic ``SecretStr``, or some other surprise into an envelope + fails loudly during testing rather than silently stringifying it. + + Args: + payload: The envelope dict to serialize. + + Raises: + TypeError: ``payload`` contains an object whose type is neither + JSON-native nor :class:`datetime` / :class:`Path`. + """ + from datetime import datetime + + def _encoder(obj: object) -> str: + """Stringify the only two non-JSON-native types we permit.""" + if isinstance(obj, (datetime, Path)): + return str(obj) + raise TypeError( + f"Envelope contains non-serializable {type(obj).__name__}; " + f"add explicit handling in _emit_json or pre-serialize the value." + ) + + print(json.dumps(payload, default=_encoder)) + + +def _validate_two_shot_flag_combos( + *, + start: bool, + finish: str | None, + resume: Path | None, + service_account: bool, + token_env: str | None, + secret_stdin: bool, + no_browser: bool, + name: str | None, + project: str | None, +) -> None: + """Reject incompatible flag combinations BEFORE any I/O. + + Mirrors the ``InvalidArgumentError`` patterns the orchestrator already + uses for the legacy paths so the slash command sees the same exit 3 + behavior whether the user mismatches a legacy flag or a new flag. + + Args: + start: ``--start`` flag value. + finish: ``--finish URL`` flag value. + resume: ``--resume PATH`` flag value. + service_account: ``--service-account`` flag value. + token_env: ``--token-env`` flag value. + secret_stdin: ``--secret-stdin`` flag value. + no_browser: ``--no-browser`` flag value. + name: ``--name`` flag value (only invalid for ``--start``). + project: ``--project`` flag value (only invalid for ``--start``). + + Raises: + InvalidArgumentError: Any of the documented invalid combinations. + """ + two_shot_set = sum(1 for f in (start, finish is not None, resume is not None) if f) + if two_shot_set > 1: + raise InvalidArgumentError( + "Pick exactly one of --start, --finish URL, --resume PATH " + "(they are mutually exclusive).", + violation="mutually_exclusive", + ) + in_two_shot_mode = two_shot_set == 1 + if not in_two_shot_mode: + return + + # Two-shot is oauth_browser-only — none of the legacy auth-type flags apply. + bad: list[str] = [] + if service_account: + bad.append("--service-account") + if token_env is not None: + bad.append("--token-env") + if secret_stdin: + bad.append("--secret-stdin") + if no_browser: + bad.append("--no-browser") + if bad: + raise InvalidArgumentError( + f"--start / --finish / --resume are oauth_browser-only and " + f"incompatible with: {', '.join(bad)}.", + violation="mutually_exclusive", + detected_auth_type="oauth_browser", + ) + + if start and (name is not None or project is not None): + # --start emits the authorize URL only; name + project decisions + # happen at --finish time when /me is available. + raise InvalidArgumentError( + "--start cannot accept --name or --project (those decisions " + "happen at --finish time, after /me runs).", + violation="mutually_exclusive", + ) + + @handle_errors def login( ctx: typer.Context, @@ -175,6 +448,45 @@ def login( help="For service_account, read the secret from stdin.", ), ] = False, + start: Annotated[ + bool, + typer.Option( + "--start", + help=( + "Two-shot flow: emit authorize URL + persist PKCE verifier, " + "then exit. Run `mp login --finish URL` after the user " + "authorizes in their browser. For Cowork / CI / " + "non-TTY environments where the loopback callback can't " + "reach the user's host browser." + ), + ), + ] = False, + finish: Annotated[ + str | None, + typer.Option( + "--finish", + metavar="URL", + help=( + "Two-shot flow: complete login using the redirect URL " + "pasted from the user's browser. Reads the inflight " + "session, exchanges the code, runs /me + auto-pick + " + "publish." + ), + ), + ] = None, + resume: Annotated[ + Path | None, + typer.Option( + "--resume", + metavar="PATH", + help=( + "Two-shot flow: post-publish-failure recovery. PATH is " + "the .tmp-* placeholder dir left in ~/.mp/accounts/ by " + "a failed `mp login --finish`. Re-runs only the publish " + "tail (no PKCE, no code exchange)." + ), + ), + ] = None, ) -> None: """Add a Mixpanel account with guided region / project / name resolution. @@ -183,6 +495,13 @@ def login( conversational command. All progress narration goes to stderr; the success summary goes to stdout as a single line. + The two-shot ``--start`` / ``--finish`` / ``--resume`` flow extends + this for headless environments (Cowork sandboxes, CI runners, + devcontainers, browserless SSH) where the loopback OAuth callback + can't reach the user's host browser. The two-shot path emits machine- + parseable JSON envelopes on stdout instead of the single-line + success summary so slash commands and scripted wrappers can drive it. + Examples: mp login # browser, single project @@ -190,7 +509,10 @@ def login( cat secret | mp login --service-account --secret-stdin --name prod-sa MY_TOKEN=eyJ... mp login --token-env MY_TOKEN mp login --project 3713224 # browser, skip prompt - mp login --no-browser # headless oauth flow + mp login --no-browser # same-machine SSH paste-back + mp login --start --region eu # Cowork: print URL, exit + mp login --finish 'http://localhost:.../callback?code=...&state=...' + mp login --resume ~/.mp/accounts/.tmp-abc12345 Args: ctx: Typer context. Used to wire ``--quiet`` into the spinner @@ -202,6 +524,9 @@ def login( token_env: Env-var name carrying the bearer (oauth_token). no_browser: For oauth_browser, print the URL instead of launching. secret_stdin: For service_account, read the secret from stdin. + start: Two-shot flow: emit authorize URL + persist verifier, exit. + finish: Two-shot flow: complete using the pasted redirect URL. + resume: Two-shot flow: recover from a post-publish failure. """ # Region is the only flag the orchestrator can't validate (it # accepts a Region literal, not an arbitrary string). Reject early @@ -212,19 +537,60 @@ def login( ) raise typer.Exit(ExitCode.INVALID_ARGS) - # All other flag-combination validation lives in - # ``accounts.login_unified`` and surfaces as - # ``InvalidArgumentError`` (subclass of ``ConfigError``) with a - # ``violation`` discriminator the @handle_errors decorator maps to - # exit 3. Keeping the rules in the orchestrator means non-CLI - # callers (Cowork, JSON consumers) get the same protection without - # having to re-implement the matrix. - # Wrap the orchestrator's /me fetch in a Rich spinner. /me is the - # slowest single call in the login flow (multi-second when an - # account spans many projects across orgs); without this hook the - # terminal looks hung between region-probe narration and the - # project picker. ``status_spinner`` honors --quiet and falls back - # to a no-op in non-TTY contexts so piped invocations stay clean. + _validate_two_shot_flag_combos( + start=start, + finish=finish, + resume=resume, + service_account=service_account, + token_env=token_env, + secret_stdin=secret_stdin, + no_browser=no_browser, + name=name, + project=project, + ) + + if start: + start_result = accounts_ns.login_unified_start(region=region) # type: ignore[arg-type] + _emit_json(_build_start_envelope(start_result)) + return + + if finish is not None: + try: + result = accounts_ns.login_unified_finish( + pasted_url=finish, + name=name, + project=project, + project_picker=_project_picker_tty if sys.stdin.isatty() else None, + progress=lambda msg: status_spinner(ctx, msg), + ) + except NeedsRegionSwitchError as exc: + _emit_json(_build_region_switch_envelope(exc)) + raise typer.Exit(ExitCode.NEEDS_SELECTION) from None + except LoginFinishPublishError as exc: + _emit_json(_build_publish_failure_envelope(exc)) + raise typer.Exit(ExitCode.GENERAL_ERROR) from None + _emit_json(_build_finish_envelope(result)) + return + + if resume is not None: + try: + result = accounts_ns.login_unified_resume( + placeholder_dir=resume, + name=name, + project=project, + project_picker=_project_picker_tty if sys.stdin.isatty() else None, + progress=lambda msg: status_spinner(ctx, msg), + ) + except NeedsRegionSwitchError as exc: + _emit_json(_build_region_switch_envelope(exc)) + raise typer.Exit(ExitCode.NEEDS_SELECTION) from None + except LoginFinishPublishError as exc: + _emit_json(_build_publish_failure_envelope(exc)) + raise typer.Exit(ExitCode.GENERAL_ERROR) from None + _emit_json(_build_finish_envelope(result)) + return + + # Legacy single-shot path — unchanged behavior. summary = accounts_ns.login_unified( name=name, region=region, # type: ignore[arg-type] # validated above @@ -237,11 +603,6 @@ def login( progress=lambda msg: status_spinner(ctx, msg), ) - # Stdout success line (single line, structured for `mp login | tee log.txt`). - # cli-commands.md §1.4 fixes the format as - # ``Logged in as {user_email} → {account_name} · {project_name}``. - # Each /me-derived field falls back to a literal "(unknown ...)" when - # absent so the line stays parseable even on partial /me payloads. user_email = summary.user_email or "(unknown user)" project_label = summary.project_name or "(no project)" console.print(f"Logged in as {user_email} → {summary.name} · {project_label}") diff --git a/src/mixpanel_headless/cli/utils.py b/src/mixpanel_headless/cli/utils.py index 739bc68..a277672 100644 --- a/src/mixpanel_headless/cli/utils.py +++ b/src/mixpanel_headless/cli/utils.py @@ -36,6 +36,7 @@ InvalidArgumentError, JQLSyntaxError, MixpanelHeadlessError, + NeedsRegionSwitchError, OAuthError, ProjectNotFoundError, QueryError, @@ -80,6 +81,7 @@ class ExitCode(IntEnum): INVALID_ARGS = 3 NOT_FOUND = 4 RATE_LIMIT = 5 + NEEDS_SELECTION = 6 INTERRUPTED = 130 @@ -339,6 +341,27 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: "secret are correct." ) raise typer.Exit(ExitCode.AUTH_ERROR) from None + except NeedsRegionSwitchError as e: + # Subclass of OAuthError — must come BEFORE the generic OAuthError + # handler so the cross-region message + alternatives surface + # instead of a generic "OAuth error" line. Maps to exit 6 + # (NEEDS_SELECTION) so shell scripts can distinguish "user must + # do something specific" from "auth genuinely broken". + err_console.print(f"[red]ERROR:[/red] {rich_escape(e.message)}") + cross = e.pick.cross_region_projects or [] + if cross: + err_console.print("") + err_console.print("Cross-region projects (sample):") + for pid, name, domain in cross[:10]: + err_console.print( + f" {rich_escape(pid)} {rich_escape(name)} " + f"({rich_escape(domain)})" + ) + if len(cross) > 10: + err_console.print( + f" … and {len(cross) - 10} more (see JSON output for full list)." + ) + raise typer.Exit(ExitCode.NEEDS_SELECTION) from None except OAuthError as e: err_console.print(f"[red]OAuth error:[/red] {rich_escape(e.message)}") if e.code: @@ -613,14 +636,25 @@ def output_result( print(output) +#: Cadence at which the non-TTY heartbeat thread emits "still running" +#: lines. 15s is short enough that a Cowork user sees activity within the +#: first 30s of a 67s ``/me`` call, long enough that a 1s spinner won't +#: clutter the log with redundant lines. +_NON_TTY_HEARTBEAT_INTERVAL_SECONDS = 15.0 + + @contextmanager def status_spinner(ctx: typer.Context, message: str) -> Generator[None, None, None]: """Context manager to show a spinner for long-running operations. Shows an animated spinner on stderr while the wrapped operation runs. - Respects the --quiet flag to suppress the spinner. Also skips the - spinner in non-TTY environments (e.g., CI/CD pipelines) to avoid - cluttering logs with escape sequences. + Respects the --quiet flag to suppress the spinner. In non-TTY + environments (CI, Cowork, pipes), instead of running a no-op, the + spinner emits a single-line stderr heartbeat every + ``_NON_TTY_HEARTBEAT_INTERVAL_SECONDS`` so callers can see the process + is still working. Without this, slow ``/me`` calls (67-74s on busy + accounts) leave Cowork users staring at silence and killing the + process before it completes. Args: ctx: Typer context with global options in obj dict. @@ -634,12 +668,44 @@ def status_spinner(ctx: typer.Context, message: str) -> Generator[None, None, No bookmarks = workspace.list_bookmarks() """ import sys + import threading + import time as _time quiet = ctx.obj.get("quiet", False) if ctx.obj else False - # Skip spinner if quiet mode or non-interactive (e.g., CI/CD, pipes) - if quiet or not sys.stderr.isatty(): + if quiet: yield - else: + return + + if sys.stderr.isatty(): with err_console.status(message): yield + return + + # Non-TTY: heartbeat thread. Daemon so a hard exit doesn't wait on it; + # event-driven wakeup so __exit__ stops the cadence within 50 ms. + stop = threading.Event() + started = _time.monotonic() + + def _heartbeat() -> None: + # Print a starting line so callers see something within the first + # turn of the loop (the user reads the prompt-message even before + # the first 15s tick). + print(f"[mp] {message}", file=sys.stderr, flush=True) + while not stop.wait(_NON_TTY_HEARTBEAT_INTERVAL_SECONDS): + elapsed = int(_time.monotonic() - started) + print( + f"[mp] {message} ({elapsed}s elapsed…)", + file=sys.stderr, + flush=True, + ) + + hb = threading.Thread(target=_heartbeat, daemon=True) + hb.start() + try: + yield + finally: + stop.set() + # Don't join — the thread is daemon, sleeps on the event, and exits + # promptly. Joining adds 0–50 ms latency to every CLI command for no + # observable correctness gain. diff --git a/src/mixpanel_headless/exceptions.py b/src/mixpanel_headless/exceptions.py index 03bab4e..6495d38 100644 --- a/src/mixpanel_headless/exceptions.py +++ b/src/mixpanel_headless/exceptions.py @@ -19,7 +19,10 @@ from typing import TYPE_CHECKING, Any, Literal if TYPE_CHECKING: + from pathlib import Path + from mixpanel_headless._internal.auth.account import Region + from mixpanel_headless.accounts import ProjectPickResult class MixpanelHeadlessError(Exception): @@ -1033,10 +1036,21 @@ class OAuthError(MixpanelHeadlessError): Error codes: - OAUTH_TOKEN_ERROR: Token exchange or validation failed - OAUTH_REFRESH_ERROR: Token refresh failed + - OAUTH_REFRESH_REVOKED: Refresh token rejected by IdP (re-login required) - OAUTH_REGISTRATION_ERROR: Dynamic Client Registration failed - OAUTH_TIMEOUT: Callback server timed out waiting for authorization - OAUTH_PORT_ERROR: All callback ports are occupied - OAUTH_BROWSER_ERROR: Could not open browser for authorization + - OAUTH_PASTE_ERROR: Pasted redirect URL was empty / unparseable / missing + ``code`` or ``state`` + - OAUTH_STATE_MISMATCH: Pasted ``state`` did not match the in-flight session + - OAUTH_AUTH_DENIED: OAuth provider returned an ``error=`` parameter + - OAUTH_CONFIG_ERROR: Invalid region or other static config rejected at construction + - OAUTH_INFLIGHT_MISSING: ``mp login --finish`` / ``--resume`` invoked with + no inflight file on disk (run ``mp login --start`` first) + - OAUTH_INFLIGHT_EXPIRED: Inflight file present but ``expires_at < now()`` + (the 10-minute TTL expired; re-run ``mp login --start``) + - OAUTH_INFLIGHT_CORRUPT: Inflight JSON failed to parse / is missing required keys Example: ```python @@ -1178,6 +1192,156 @@ def __init__( ) +class NeedsRegionSwitchError(OAuthError): + """``/me`` returned 0 region-compatible projects after auto-pick filtering. + + Raised by :func:`mixpanel_headless.accounts._resolve_project` when the + user authenticated against region X but every visible project lives in + a different cluster. The auto-pick algorithm refuses to publish a + wrong-region account because every subsequent request would 401. + + Carries the full :class:`ProjectPickResult` so the CLI can render the + cross-region alternatives — typically a one-line "you have N projects in + eu and M in in; run ``mp login --start --region eu``" hint plus a + structured ``cross_region_projects`` list for programmatic consumers. + + Maps to ``ExitCode.NEEDS_SELECTION`` (6) via the ``@handle_errors`` + decorator. Code is ``NEEDS_REGION_SWITCH``. + + Example: + ```python + try: + pick = _resolve_project(me_resp=me, region="us", auto_pick=True, ...) + except NeedsRegionSwitchError as exc: + for pid, name, domain in exc.pick.cross_region_projects or []: + print(f"{pid} ({name}) lives at {domain}") + ``` + """ + + def __init__(self, message: str, *, pick: ProjectPickResult) -> None: + """Initialize NeedsRegionSwitchError. + + Args: + message: Human-readable summary of the cross-region situation + (e.g., counts per region + suggested ``mp login --start + --region`` command). + pick: The :class:`ProjectPickResult` carrying the + cross-region project list (``method == "cross_region_only"``) + so the CLI can render the structured envelope without + re-walking ``/me``. + """ + self._pick = pick + super().__init__( + message, + code="NEEDS_REGION_SWITCH", + details={ + "auth_region": pick.auth_region, + "cross_region_projects": [ + {"id": pid, "name": name, "domain": domain} + for pid, name, domain in (pick.cross_region_projects or []) + ], + }, + ) + + @property + def pick(self) -> ProjectPickResult: + """The full :class:`ProjectPickResult` carrying cross-region data.""" + return self._pick + + +class LoginFinishPublishError(MixpanelHeadlessError): + """Token exchange succeeded but the publish step failed. + + Raised by :func:`mixpanel_headless.accounts.login_unified_finish` (and + :func:`login_unified_resume`) when the OAuth code has already been + exchanged for tokens but the subsequent ``/me`` probe / project + resolution / account write failed in a recoverable way (bad + ``--name``, project not visible, name collision, transient ``/me`` + failure, etc.). Re-running ``mp login --finish`` would fail at the + token exchange step because the IdP one-time-uses authorization + codes; the user must run ``mp login --resume `` + with corrected args to recover. + + Carries ``placeholder_dir`` so the CLI / slash command can surface + the exact resume command without the user having to ``ls + ~/.mp/accounts/`` for a ``.tmp-*`` directory. The original error's + code, message, and details are preserved so consumers can still + branch on the underlying cause. + + Maps to ``ExitCode.GENERAL_ERROR`` (1) by default; the CLI's + ``--finish`` / ``--resume`` paths catch it explicitly and emit a + structured JSON error envelope on stdout before exit. + + Example: + ```python + try: + result = login_unified_finish(pasted_url=url) + except LoginFinishPublishError as exc: + print(f"Resume with: mp login --resume {exc.placeholder_dir}") + print(f"Original cause: {exc.original_code}: {exc.original_message}") + ``` + """ + + def __init__( + self, + *, + placeholder_dir: Path, + cause: BaseException, + ) -> None: + """Initialize LoginFinishPublishError. + + Args: + placeholder_dir: The ``.tmp-{nonce}`` directory still on disk + with valid ``tokens.json``. The user passes this to + ``mp login --resume`` to recover. + cause: The underlying exception that triggered the publish + failure. ``code``, ``message``, and ``details`` are + copied into this wrapper for slash-command rendering. + """ + self._placeholder_dir = placeholder_dir + original_code = str(getattr(cause, "code", None) or type(cause).__name__) + original_message = str(cause) + original_details = ( + dict(getattr(cause, "details", {}) or {}) + if hasattr(cause, "details") + else {} + ) + self._original_code = original_code + self._original_message = original_message + message = ( + f"Token exchange succeeded but publish failed: " + f"{original_code}: {original_message}\n\n" + f"Recover with: mp login --resume {placeholder_dir}\n" + f"(Re-running --finish won't work — the OAuth code has already " + f"been consumed.)" + ) + super().__init__( + message, + code="LOGIN_FINISH_PUBLISH_FAILED", + details={ + "placeholder_dir": str(placeholder_dir), + "original_code": original_code, + "original_message": original_message, + "original_details": original_details, + }, + ) + + @property + def placeholder_dir(self) -> Path: + """The ``.tmp-{nonce}`` placeholder directory ready for ``--resume``.""" + return self._placeholder_dir + + @property + def original_code(self) -> str: + """Machine-readable code of the underlying publish failure.""" + return self._original_code + + @property + def original_message(self) -> str: + """Human-readable message of the underlying publish failure.""" + return self._original_message + + class WorkspaceScopeError(MixpanelHeadlessError): """Scope resolution error (workspace or organization). diff --git a/tests/unit/cli/test_login_cli.py b/tests/unit/cli/test_login_cli.py index bc16618..ffc19f9 100644 --- a/tests/unit/cli/test_login_cli.py +++ b/tests/unit/cli/test_login_cli.py @@ -731,13 +731,24 @@ def _fake_pkce( assert result.exit_code != 0, result.output # No tokens should have escaped above the accounts tree. assert not (tmp_path / ".mp" / "escape").exists() - # And the placeholder dir should have been cleaned up. + # New contract (two-shot flow): the placeholder is PRESERVED when + # ``tokens.json`` exists so the user can ``mp login --resume + # --name valid-name`` to recover without re-running PKCE. Verify + # the placeholder remains AND contains valid tokens.json (the + # critical security property — tokens stay inside ~/.mp/accounts/, + # never under ~/.mp/escape/). accounts_dir = tmp_path / ".mp" / "accounts" if accounts_dir.exists(): leftovers = [ p for p in accounts_dir.iterdir() if p.name.startswith(".tmp-") ] - assert leftovers == [], f"placeholder leak: {leftovers}" + # Either no placeholder (publish succeeded then bailed before + # writing tokens) or exactly one placeholder with tokens.json. + for placeholder in leftovers: + tokens_path = placeholder / "tokens.json" + assert tokens_path.exists(), ( + f"placeholder kept without tokens.json: {placeholder}" + ) class TestMpLoginPostRenameRollback: diff --git a/tests/unit/cli/test_login_two_shot.py b/tests/unit/cli/test_login_two_shot.py new file mode 100644 index 0000000..bf352dd --- /dev/null +++ b/tests/unit/cli/test_login_two_shot.py @@ -0,0 +1,585 @@ +"""CLI tests for the ``mp login --start`` / ``--finish`` / ``--resume`` flow. + +Mirrors the patterns in ``tests/unit/cli/test_login_cli.py`` (CliRunner +with ``--isolated_home``, ``MixpanelAPIClient.me`` monkeypatched, no +respx). Covers the §9 envelope shapes, the mutual-exclusion rules, the +``NeedsRegionSwitchError`` exit-6 path, and a happy-path two-shot +sequence (start → finish) with mocked OAuth. +""" + +from __future__ import annotations + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from urllib.parse import urlparse + +import pytest +from pydantic import SecretStr +from typer.testing import CliRunner + +from mixpanel_headless._internal.auth.token import OAuthTokens +from mixpanel_headless.cli.main import app + + +@pytest.fixture(autouse=True) +def _isolated_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Pin $HOME, MP_CONFIG_PATH, MP_OAUTH_STORAGE_DIR for hermetic tests.""" + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("MP_CONFIG_PATH", str(tmp_path / ".mp" / "config.toml")) + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path / ".mp")) + + +@pytest.fixture +def runner() -> CliRunner: + """A Typer CliRunner.""" + return CliRunner() + + +def _stub_dcr( + monkeypatch: pytest.MonkeyPatch, *, client_id: str = "test_client_xyz" +) -> None: + """Patch ``ensure_client_registered`` to return a canned client info.""" + from mixpanel_headless._internal.auth import client_registration + from mixpanel_headless._internal.auth.token import OAuthClientInfo + + def _fake( + http_client: Any, region: str, redirect_uri: str, storage: Any + ) -> OAuthClientInfo: + """Inner stub returning a constant client info.""" + del http_client, storage + return OAuthClientInfo( + client_id=client_id, + region=region, + redirect_uri=redirect_uri, + scope="read", + created_at=datetime.now(timezone.utc), + ) + + monkeypatch.setattr(client_registration, "ensure_client_registered", _fake) + # OAuthFlow imports the same symbol from client_registration; patch + # the rebound name there too so test imports don't bypass the stub. + from mixpanel_headless._internal.auth import flow as flow_mod + + monkeypatch.setattr(flow_mod, "ensure_client_registered", _fake, raising=False) + + +def _stub_me(monkeypatch: pytest.MonkeyPatch, payload: dict[str, object]) -> None: + """Patch ``MixpanelAPIClient.me`` to return a canned /me payload.""" + from mixpanel_headless._internal import api_client as api_client_mod + + def _fake_me(self: object) -> dict[str, object]: + """Inner stub returning the canned /me payload.""" + return payload + + monkeypatch.setattr(api_client_mod.MixpanelAPIClient, "me", _fake_me) + + +def _stub_exchange(monkeypatch: pytest.MonkeyPatch) -> None: + """Patch ``OAuthFlow.exchange_code`` to return a canned token set.""" + from mixpanel_headless._internal.auth import flow as flow_mod + + def _fake( + self: Any, + *, + code: str, + verifier: str, + client_id: str, + redirect_uri: str, + ) -> OAuthTokens: + """Inner stub returning constant tokens (long-lived expires_at).""" + del self, code, verifier, client_id, redirect_uri + return OAuthTokens( + access_token=SecretStr("access_xyz"), + refresh_token=SecretStr("refresh_abc"), + expires_at=datetime(2099, 1, 1, tzinfo=timezone.utc), + scope="read", + token_type="Bearer", + ) + + # exchange_code accepts kwargs in the orchestrator; tests pass them positionally + # via the OAuthFlow.exchange_code signature. Patch as a method. + monkeypatch.setattr(flow_mod.OAuthFlow, "exchange_code", _fake) + + +def _me_payload(projects: dict[str, dict[str, Any]]) -> dict[str, Any]: + """Build a /me JSON payload with the user / org / project shape.""" + org_ids = {p["organization_id"] for p in projects.values()} + return { + "user_id": 42, + "user_email": "test@example.com", + "user_name": "Test User", + "organizations": { + str(oid): {"id": oid, "name": f"Org {oid}", "role": "admin"} + for oid in org_ids + }, + "projects": projects, + "workspaces": {}, + } + + +class TestStartFlag: + """``mp login --start`` emits the documented JSON envelope.""" + + def test_start_emits_json_envelope( + self, runner: CliRunner, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Single-line JSON envelope on stdout, exit 0.""" + _stub_dcr(monkeypatch) + result = runner.invoke(app, ["login", "--start"]) + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout) + assert payload["schema_version"] == 1 + assert payload["state"] == "ok" + assert payload["region"] == "us" + assert "authorize_url" in payload + assert payload["authorize_url"].startswith( + "https://mixpanel.com/oauth/authorize/" + ) + assert "redirect_uri" in payload + assert "expires_at" in payload + assert payload["expires_at"] > 0 + assert "inflight_path" in payload + + def test_start_with_eu_region( + self, runner: CliRunner, monkeypatch: pytest.MonkeyPatch + ) -> None: + """``--region eu`` produces an eu-cluster authorize URL.""" + _stub_dcr(monkeypatch) + result = runner.invoke(app, ["login", "--start", "--region", "eu"]) + assert result.exit_code == 0, result.output + payload = json.loads(result.stdout) + assert payload["region"] == "eu" + # Compare hostname rather than substring so a future bug that + # builds e.g. ``https://eu.mixpanel.com.attacker.example/`` would + # still trip this test (also keeps CodeQL's + # ``incomplete-url-substring-sanitization`` rule quiet). + assert urlparse(payload["authorize_url"]).hostname == "eu.mixpanel.com" + + +class TestFinishHappyPath: + """``mp login --finish URL`` against a single-project /me publishes.""" + + def test_finish_publishes_account_with_auto_pick( + self, + runner: CliRunner, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + ) -> None: + """Two-shot --start → --finish writes a usable account + emits envelope.""" + _stub_dcr(monkeypatch) + _stub_exchange(monkeypatch) + _stub_me( + monkeypatch, + _me_payload( + { + "100": { + "name": "Test Project", + "organization_id": 1, + "domain": "mixpanel.com", + "is_demo": False, + "has_integrated": True, + }, + } + ), + ) + + # First, --start to create the inflight. + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0, result_start.output + start_payload = json.loads(result_start.stdout) + state = start_payload["authorize_url"].split("state=")[1].split("&")[0] + + # Build the redirect URL that --finish will accept. + redirect_url = ( + f"http://localhost:19284/callback?code=auth_code_xyz&state={state}" + ) + + # --finish completes the flow. + result_finish = runner.invoke(app, ["login", "--finish", redirect_url]) + assert result_finish.exit_code == 0, result_finish.output + payload = json.loads(result_finish.stdout) + assert payload["state"] == "ok" + assert payload["account"]["type"] == "oauth_browser" + assert payload["project"]["id"] == "100" + assert payload["project_pick"]["method"] == "sole_survivor" + # auto_picked is True because there's no explicit project AND no picker. + assert payload["project_pick"]["auto_picked"] is True + # next[] reuses _PROJECT_NEXT verbatim. + assert payload["next"][0]["command"] == "mp project list" + + +class TestFinishStateMismatch: + """A pasted URL with the wrong state → OAUTH_STATE_MISMATCH, exit 2.""" + + def test_state_mismatch_raises( + self, runner: CliRunner, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Pasted URL with a forged state → AUTH_ERROR exit 2.""" + _stub_dcr(monkeypatch) + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0, result_start.output + + # Pass a redirect URL with a state that doesn't match the inflight. + bad_url = "http://localhost:19284/callback?code=foo&state=wrong_state" + result = runner.invoke(app, ["login", "--finish", bad_url]) + assert result.exit_code == 2 # AUTH_ERROR + assert ( + "State mismatch" in result.output or "OAUTH_STATE_MISMATCH" in result.output + ) + + +class TestFinishMissingInflight: + """``--finish`` without prior ``--start`` → OAUTH_INFLIGHT_MISSING.""" + + def test_no_inflight_raises(self, runner: CliRunner) -> None: + """No prior --start → OAUTH_INFLIGHT_MISSING, exit 2.""" + result = runner.invoke( + app, + ["login", "--finish", "http://localhost:19284/callback?code=x&state=y"], + ) + assert result.exit_code == 2 # AUTH_ERROR + assert ( + "OAUTH_INFLIGHT_MISSING" in result.output + or "inflight" in result.output.lower() + ) + + +class TestCrossRegionExit6: + """``/me`` returns only-EU projects with US auth → exit 6 + JSON envelope.""" + + def test_cross_region_exits_6_with_envelope( + self, + runner: CliRunner, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """US auth + only-EU /me → exit 6 + state:error envelope.""" + _stub_dcr(monkeypatch) + _stub_exchange(monkeypatch) + _stub_me( + monkeypatch, + _me_payload( + { + "100": { + "name": "EU Project", + "organization_id": 1, + "domain": "eu.mixpanel.com", + "is_demo": False, + "has_integrated": True, + }, + } + ), + ) + + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0 + state = ( + json.loads(result_start.stdout)["authorize_url"] + .split("state=")[1] + .split("&")[0] + ) + redirect = f"http://localhost:19284/callback?code=foo&state={state}" + + result = runner.invoke(app, ["login", "--finish", redirect]) + assert result.exit_code == 6 # NEEDS_SELECTION + payload = json.loads(result.stdout) + assert payload["state"] == "error" + assert payload["error"]["code"] == "NEEDS_REGION_SWITCH" + assert payload["error"]["actionable"] is True + assert payload["error"]["details"]["auth_region"] == "us" + cross = payload["error"]["details"]["cross_region_projects"] + assert any(p["domain"] == "eu.mixpanel.com" for p in cross) + + +class TestFlagMutex: + """Mutually-exclusive flag combinations exit 3 (INVALID_ARGS).""" + + def test_start_and_finish_together(self, runner: CliRunner) -> None: + """``--start`` + ``--finish`` rejected as mutually exclusive.""" + result = runner.invoke( + app, + ["login", "--start", "--finish", "http://x?code=a&state=b"], + ) + assert result.exit_code == 3 + assert "mutually exclusive" in result.output + + def test_start_and_service_account(self, runner: CliRunner) -> None: + """Two-shot flags are oauth_browser-only — SA flag rejected.""" + result = runner.invoke(app, ["login", "--start", "--service-account"]) + assert result.exit_code == 3 + # Rich wraps long lines; assert on a fragment that survives wrapping. + assert "--service-account" in result.output + assert "oauth_browser-only" in result.output + + def test_start_and_name(self, runner: CliRunner) -> None: + """``--start`` + ``--name`` rejected (name is a finish-time decision).""" + result = runner.invoke(app, ["login", "--start", "--name", "foo"]) + assert result.exit_code == 3 + assert "--start cannot accept" in result.output + + +class TestResumeMissing: + """``--resume`` on a non-existent placeholder → ConfigError, exit 1.""" + + def test_resume_missing_path_raises( + self, runner: CliRunner, tmp_path: Path + ) -> None: + """Bogus placeholder path under accounts_root → ConfigError, exit 1.""" + # Place the bogus path under accounts_root so the path-traversal + # guard does not fire before the existence check. + from mixpanel_headless._internal.auth.storage import accounts_root + + accounts_root().mkdir(parents=True, exist_ok=True, mode=0o700) + bogus = accounts_root() / ".tmp-deadbeef" + result = runner.invoke(app, ["login", "--resume", str(bogus)]) + assert result.exit_code == 1 + # Rich wraps long error lines, so collapse whitespace before + # substring matching to avoid coupling the test to terminal width. + normalized = " ".join(result.output.split()) + assert "does not exist" in normalized + + def test_resume_path_outside_accounts_root_raises( + self, runner: CliRunner, tmp_path: Path + ) -> None: + """``--resume /tmp/.tmp-foo`` (outside accounts_root) → ConfigError. + + Path-traversal guard: ``--resume`` will rename the placeholder + into ``accounts_root()``. If a caller hands us an arbitrary + on-disk path, we'd silently exfiltrate whoever's tokens are + sitting there. Reject before reading anything. + """ + # tmp_path/.tmp-foo lives outside ``accounts_root()`` (which is + # tmp_path/.mp/accounts under the isolated_home fixture). + outside = tmp_path / ".tmp-foo-outside" + outside.mkdir(mode=0o700) + (outside / "tokens.json").write_text("{}", encoding="utf-8") + result = runner.invoke(app, ["login", "--resume", str(outside)]) + assert result.exit_code == 1 + normalized = " ".join(result.output.split()) + assert "must live under" in normalized + + def test_resume_corrupt_meta_does_not_delete_placeholder( + self, + runner: CliRunner, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Corrupt meta.json → loud error, placeholder preserved. + + Previously the silent ``us`` fallback could hand an EU + placeholder to ``_publish_account_from_tokens``, raise + ``NeedsRegionSwitchError``, and delete the recoverable + tokens. Now: corrupt meta raises before any cleanup runs, + so the user can repair meta.json and retry. + """ + from mixpanel_headless._internal.auth.inflight import ( + new_placeholder_dir, + ) + from mixpanel_headless._internal.auth.storage import accounts_root + + accounts_root().mkdir(parents=True, exist_ok=True, mode=0o700) + placeholder = new_placeholder_dir(accounts_root()) + (placeholder / "tokens.json").write_text( + json.dumps( + { + "access_token": "ya29.access", + "refresh_token": "1//refresh", + "expires_at": "2099-01-01T00:00:00+00:00", + "scope": "read write", + "token_type": "Bearer", + } + ), + encoding="utf-8", + ) + (placeholder / "meta.json").write_text("not json", encoding="utf-8") + + result = runner.invoke(app, ["login", "--resume", str(placeholder)]) + assert result.exit_code != 0 + # Placeholder must remain on disk so the user can fix meta.json + # by hand and retry without losing their tokens. + assert placeholder.exists() + assert (placeholder / "tokens.json").exists() + + +class TestPostExchangeFailureSurfacesPlaceholder: + """Token exchange succeeds + publish fails → envelope carries placeholder. + + Codex review [P2]: when post-exchange publish fails (bad --name, + project not visible, /me parse error), the OAuth code is consumed + and re-running --finish would fail at exchange. The user must + `mp login --resume ` instead — but they need the path. The + LoginFinishPublishError wrap surfaces it through a structured JSON + envelope. + """ + + def test_invalid_name_emits_publish_failure_envelope( + self, + runner: CliRunner, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + ) -> None: + """Bad --name passes the regex but trips the path-traversal guard.""" + _stub_dcr(monkeypatch) + _stub_exchange(monkeypatch) + _stub_me( + monkeypatch, + _me_payload( + { + "100": { + "name": "Test Project", + "organization_id": 1, + "domain": "mixpanel.com", + "is_demo": False, + "has_integrated": True, + }, + } + ), + ) + + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0 + state = ( + json.loads(result_start.stdout)["authorize_url"] + .split("state=")[1] + .split("&")[0] + ) + redirect = f"http://localhost:19284/callback?code=foo&state={state}" + + # --name "../escape" fails the account_dir regex AFTER tokens.json + # is written to the placeholder. The wrap should surface the + # placeholder path. + result = runner.invoke( + app, + ["login", "--finish", redirect, "--name", "../escape"], + ) + assert result.exit_code == 1, result.output + payload = json.loads(result.stdout) + assert payload["state"] == "error" + assert payload["error"]["code"] == "LOGIN_FINISH_PUBLISH_FAILED" + assert payload["error"]["actionable"] is True + assert payload["error"]["details"]["placeholder_dir"].endswith( + tuple( + f"{name}" + for name in payload["error"]["details"]["placeholder_dir"].split("/")[ + -1: + ] + ) + ) + # Original cause is preserved so consumers can still branch on it. + assert payload["error"]["details"]["original_code"] == "CONFIG_ERROR" + # Resume hint with exact command. + resume_cmd = payload["resume_hint"]["command"] + assert resume_cmd.startswith("mp login --resume ") + assert ".tmp-" in resume_cmd + + # Placeholder dir must still exist on disk so --resume actually works. + placeholder_path = Path(payload["error"]["details"]["placeholder_dir"]) + assert placeholder_path.exists() + assert (placeholder_path / "tokens.json").exists() + + # Inflight should be cleared (code is consumed; re-running --finish + # with the same paste would fail at exchange anyway). + from mixpanel_headless._internal.auth.inflight import inflight_path + + assert not inflight_path().exists() + + def test_project_not_visible_emits_publish_failure_envelope( + self, + runner: CliRunner, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """--project ID not in /me → wrapped as LoginFinishPublishError.""" + _stub_dcr(monkeypatch) + _stub_exchange(monkeypatch) + _stub_me( + monkeypatch, + _me_payload( + { + "100": { + "name": "Visible", + "organization_id": 1, + "domain": "mixpanel.com", + "is_demo": False, + "has_integrated": True, + }, + } + ), + ) + + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0 + state = ( + json.loads(result_start.stdout)["authorize_url"] + .split("state=")[1] + .split("&")[0] + ) + redirect = f"http://localhost:19284/callback?code=foo&state={state}" + + result = runner.invoke( + app, + ["login", "--finish", redirect, "--project", "999"], + ) + assert result.exit_code == 1, result.output + payload = json.loads(result.stdout) + assert payload["state"] == "error" + assert payload["error"]["code"] == "LOGIN_FINISH_PUBLISH_FAILED" + # Underlying ProjectNotFoundError code is preserved. + assert payload["error"]["details"]["original_code"] == "PROJECT_NOT_FOUND" + assert "mp login --resume" in payload["resume_hint"]["command"] + + +class TestCrossRegionCleansUpPlaceholder: + """NeedsRegionSwitchError must NOT leave the placeholder on disk. + + Cross-region is fundamental, not transient — re-running --resume + against the same placeholder hits the exact same error. The CLI + should clean up the placeholder so the user can `mp login --start + --region eu` cleanly without orphan dirs. + """ + + def test_cross_region_removes_placeholder( + self, + runner: CliRunner, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + ) -> None: + """After NEEDS_REGION_SWITCH, ~/.mp/accounts/.tmp-* is gone.""" + _stub_dcr(monkeypatch) + _stub_exchange(monkeypatch) + _stub_me( + monkeypatch, + _me_payload( + { + "100": { + "name": "EU Project", + "organization_id": 1, + "domain": "eu.mixpanel.com", + "is_demo": False, + "has_integrated": True, + }, + } + ), + ) + + result_start = runner.invoke(app, ["login", "--start"]) + assert result_start.exit_code == 0 + state = ( + json.loads(result_start.stdout)["authorize_url"] + .split("state=")[1] + .split("&")[0] + ) + redirect = f"http://localhost:19284/callback?code=foo&state={state}" + + result = runner.invoke(app, ["login", "--finish", redirect]) + assert result.exit_code == 6 # NEEDS_SELECTION + + # No orphan placeholder dirs. + accounts_dir = tmp_path / ".mp" / "accounts" + if accounts_dir.exists(): + leftovers = [ + p for p in accounts_dir.iterdir() if p.name.startswith(".tmp-") + ] + assert leftovers == [], ( + f"Cross-region failure left orphan placeholder(s): {leftovers}" + ) diff --git a/tests/unit/test_auth_flow.py b/tests/unit/test_auth_flow.py index e5d28f1..3b2c51a 100644 --- a/tests/unit/test_auth_flow.py +++ b/tests/unit/test_auth_flow.py @@ -342,7 +342,16 @@ def _capturing_build(self: object, **kw: object) -> str: # The paste reader thread reads via sys.stdin.readline(), so we need # to stub sys.stdin in the flow module. class _DeferredStdin: - """Stdin that blocks until state is captured, then emits the URL.""" + """Stdin that blocks until state is captured, then emits the URL. + + ``isatty`` returns True so the paste reader thread starts — + the production code skips the paste reader for non-TTY stdin + (Cowork pipes, CI runners) since there's no in-band way to + deliver paste input. + """ + + def isatty(self) -> bool: + return True def readline(self) -> str: # Wait for the authorize URL build (which captures state). diff --git a/tests/unit/test_flow_fixes.py b/tests/unit/test_flow_fixes.py new file mode 100644 index 0000000..b8ca5a9 --- /dev/null +++ b/tests/unit/test_flow_fixes.py @@ -0,0 +1,199 @@ +"""Regression tests for the two ``flow.py`` fixes shipped with the two-shot flow. + +1. Default ``OAuthFlow().http_client`` timeout is bumped above httpx's 5s + default so cold SOCKS5h handshakes (Cowork) succeed on first call. +2. Paste-reader event-queue race is fixed via ``threading.Event`` — + errors from the paste reader propagate within milliseconds instead + of waiting 310s for the callback timeout. +""" + +from __future__ import annotations + +import io +import sys +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import httpx +import pytest + +from mixpanel_headless._internal.auth.flow import OAuthFlow +from mixpanel_headless._internal.auth.storage import OAuthStorage +from mixpanel_headless._internal.auth.token import OAuthClientInfo +from mixpanel_headless.exceptions import OAuthError + + +def _fake_dcr( + http_client: Any, region: str, redirect_uri: str, storage: Any +) -> OAuthClientInfo: + """Stub for ``ensure_client_registered`` — returns a canned client info.""" + del http_client, storage + return OAuthClientInfo( + client_id="test_client", + region=region, + redirect_uri=redirect_uri, + scope="read", + created_at=datetime.now(timezone.utc), + ) + + +class TestDefaultTimeoutBumped: + """``OAuthFlow().http_client.timeout`` must exceed httpx 5s default.""" + + def test_default_read_timeout_at_least_10s( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Read timeout must be >= 10s — accommodates SOCKS handshake + slow /me.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + with OAuthFlow(region="us", storage=OAuthStorage()) as flow: + timeout = flow.http_client.timeout + assert timeout.read is not None + assert timeout.read >= 10.0, f"Read timeout {timeout.read}s too short" + + def test_default_connect_timeout_at_least_5s( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Connect timeout must be >= 5s — DNS + TCP through SOCKS proxy.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + with OAuthFlow(region="us", storage=OAuthStorage()) as flow: + timeout = flow.http_client.timeout + assert timeout.connect is not None + assert timeout.connect >= 5.0, ( + f"Connect timeout {timeout.connect}s too short" + ) + + def test_explicit_http_client_passes_through( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Caller-supplied http_client is NOT replaced by the bumped default.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + custom = httpx.Client(timeout=httpx.Timeout(2.0)) + try: + flow = OAuthFlow(region="us", storage=OAuthStorage(), http_client=custom) + assert flow.http_client is custom + finally: + custom.close() + + +class TestLifecycle: + """OAuthFlow exposes its dependencies and closes the default httpx client.""" + + def test_http_client_property_returns_underlying_client( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """``flow.http_client`` is the same instance the constructor created.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + with OAuthFlow(region="us", storage=OAuthStorage()) as flow: + assert flow.http_client is flow.http_client # idempotent property + assert isinstance(flow.http_client, httpx.Client) + + def test_storage_property_returns_underlying_storage( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """``flow.storage`` is the OAuthStorage passed to the constructor.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + storage = OAuthStorage() + with OAuthFlow(region="us", storage=storage) as flow: + assert flow.storage is storage + + def test_context_manager_closes_default_http_client( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Exiting ``with OAuthFlow(...)`` closes the default httpx.Client.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + with OAuthFlow(region="us", storage=OAuthStorage()) as flow: + client = flow.http_client + assert not client.is_closed + assert client.is_closed + + def test_close_is_idempotent( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Calling ``close()`` twice does not raise.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + flow = OAuthFlow(region="us", storage=OAuthStorage()) + flow.close() + flow.close() # second call must be safe (httpx.Client.close is idempotent) + + +class TestPasteReaderRaceFix: + """Paste-reader errors must propagate fast, not wait 310s for callback.""" + + def test_empty_stdin_with_no_browser_raises_fast( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """``open_browser=False`` + empty stdin → fast OAUTH_PASTE_ERROR. + + Pre-fix: paste reader put OAUTH_PASTE_ERROR into ``error_q``, + but the main thread was blocked on ``result_q.get(timeout=310.0)`` + — so the user waited 5 minutes for a synchronous error to surface. + Post-fix: ``threading.Event`` wakes the main thread within ms. + """ + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + + # Force isatty() to return True so the paste reader thread starts. + fake_stdin = io.StringIO("") + fake_stdin.isatty = lambda: True # type: ignore[method-assign] + monkeypatch.setattr(sys, "stdin", fake_stdin) + + from mixpanel_headless._internal.auth import flow as flow_mod + + def _blocking_callback_server(*args: Any, **kwargs: Any) -> Any: + """Block past the test budget — forces paste reader to win.""" + del args, kwargs + time.sleep(60) + raise RuntimeError("unreachable") + + monkeypatch.setattr( + flow_mod, "start_callback_server", _blocking_callback_server + ) + monkeypatch.setattr(flow_mod, "ensure_client_registered", _fake_dcr) + + with OAuthFlow(region="us", storage=OAuthStorage()) as flow: + started = time.monotonic() + with pytest.raises(OAuthError) as exc: + flow.login(open_browser=False) + elapsed = time.monotonic() - started + + assert elapsed < 5.0, f"Took {elapsed:.2f}s — race-fix regression?" + assert exc.value.code in ( + "OAUTH_PASTE_ERROR", + "OAUTH_TOKEN_ERROR", + ), f"Got unexpected code: {exc.value.code}" + + def test_non_tty_stdin_skips_paste_reader( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """Non-TTY stdin + ``open_browser=False`` skips the paste reader entirely.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + + fake_stdin = io.StringIO("") + fake_stdin.isatty = lambda: False # type: ignore[method-assign] + monkeypatch.setattr(sys, "stdin", fake_stdin) + + from mixpanel_headless._internal.auth import flow as flow_mod + + callback_called = {"hit": False} + + def _quick_timeout_server(*args: Any, **kwargs: Any) -> Any: + """Record being called, then raise quickly so the test resolves.""" + del args, kwargs + callback_called["hit"] = True + time.sleep(0.5) + raise OAuthError("test timeout", code="OAUTH_TIMEOUT") + + monkeypatch.setattr(flow_mod, "start_callback_server", _quick_timeout_server) + monkeypatch.setattr(flow_mod, "ensure_client_registered", _fake_dcr) + + with ( + OAuthFlow(region="us", storage=OAuthStorage()) as flow, + pytest.raises(OAuthError), + ): + flow.login(open_browser=False) + assert callback_called["hit"] diff --git a/tests/unit/test_inflight.py b/tests/unit/test_inflight.py new file mode 100644 index 0000000..b8bcc1b --- /dev/null +++ b/tests/unit/test_inflight.py @@ -0,0 +1,348 @@ +"""Tests for ``_internal/auth/inflight.py``. + +Covers the on-disk inflight session lifecycle, placeholder helpers, and +the boundaries between fresh / expired / corrupt state. Mirrors the +existing test patterns in :mod:`tests.unit.test_auth_storage` — +``monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path))`` for +hermetic isolation, no respx, no live network. +""" + +from __future__ import annotations + +import json +import stat +import time +from pathlib import Path + +import pytest + +from mixpanel_headless._internal.auth.inflight import ( + INFLIGHT_SCHEMA_VERSION, + INFLIGHT_TTL_SECONDS, + PLACEHOLDER_META_SCHEMA_VERSION, + InflightSession, + cache_me_in_placeholder, + clear_inflight, + find_available_callback_port, + inflight_path, + load_cached_me_from_placeholder, + load_inflight, + new_placeholder_dir, + read_placeholder_meta, + read_tokens_from_placeholder, + save_inflight, + save_placeholder_meta, +) +from mixpanel_headless._internal.me import MeProjectInfo, MeResponse +from mixpanel_headless.exceptions import OAuthError + + +@pytest.fixture +def isolated_storage(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + """Point storage helpers at ``tmp_path`` for hermetic test isolation.""" + monkeypatch.setenv("MP_OAUTH_STORAGE_DIR", str(tmp_path)) + return tmp_path + + +def _make_session(*, expires_in: int = INFLIGHT_TTL_SECONDS) -> InflightSession: + """Build a fresh InflightSession with realistic field values.""" + now = int(time.time()) + return InflightSession( + schema_version=INFLIGHT_SCHEMA_VERSION, + region="us", + client_id="test_client_abcdef", + redirect_uri="http://localhost:19284/callback", + pkce_verifier="x" * 64, + state="state_1234567890", + created_at=now, + expires_at=now + expires_in, + ) + + +class TestSaveLoadRoundTrip: + """save_inflight + load_inflight produce structurally identical sessions.""" + + def test_round_trip_preserves_all_fields(self, isolated_storage: Path) -> None: + """Every field round-trips byte-for-byte through disk.""" + session = _make_session() + save_inflight(session) + loaded = load_inflight() + assert loaded == session + + def test_inflight_file_mode_is_600(self, isolated_storage: Path) -> None: + """Verifier is a single-use secret — file must be owner-only.""" + save_inflight(_make_session()) + path = inflight_path() + mode = stat.S_IMODE(path.stat().st_mode) + assert mode == 0o600, f"Expected 0o600, got {oct(mode)}" + + def test_parent_dir_mode_is_700(self, isolated_storage: Path) -> None: + """Parent dir gets 0o700 even when the storage root is fresh.""" + save_inflight(_make_session()) + parent = inflight_path().parent + mode = stat.S_IMODE(parent.stat().st_mode) + assert mode == 0o700, f"Expected 0o700, got {oct(mode)}" + + def test_second_start_clobbers_prior_inflight(self, isolated_storage: Path) -> None: + """Second --start silently replaces the prior inflight file.""" + first = _make_session() + save_inflight(first) + second = InflightSession( + schema_version=INFLIGHT_SCHEMA_VERSION, + region="eu", + client_id="different_client", + redirect_uri="http://localhost:19285/callback", + pkce_verifier="y" * 64, + state="different_state", + created_at=first.created_at + 1, + expires_at=first.expires_at + 1, + ) + save_inflight(second) + loaded = load_inflight() + assert loaded == second + assert loaded != first + + +class TestLoadFailures: + """load_inflight raises structured OAuthError for each failure mode.""" + + def test_missing_file_raises_inflight_missing(self, isolated_storage: Path) -> None: + """No file → OAUTH_INFLIGHT_MISSING.""" + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_MISSING" + + def test_expired_file_raises_inflight_expired(self, isolated_storage: Path) -> None: + """expires_at < now() → OAUTH_INFLIGHT_EXPIRED.""" + session = _make_session(expires_in=-1) + save_inflight(session) + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_EXPIRED" + + def test_malformed_json_raises_inflight_corrupt( + self, isolated_storage: Path + ) -> None: + """Non-JSON content → OAUTH_INFLIGHT_CORRUPT.""" + path = inflight_path() + path.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + path.write_text("not json at all", encoding="utf-8") + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_CORRUPT" + + def test_missing_required_keys_raises_inflight_corrupt( + self, isolated_storage: Path + ) -> None: + """JSON object missing required fields → OAUTH_INFLIGHT_CORRUPT.""" + path = inflight_path() + path.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + path.write_text(json.dumps({"region": "us"}), encoding="utf-8") + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_CORRUPT" + + def test_non_object_json_raises_inflight_corrupt( + self, isolated_storage: Path + ) -> None: + """JSON array (not object) → OAUTH_INFLIGHT_CORRUPT.""" + path = inflight_path() + path.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + path.write_text(json.dumps(["wrong", "shape"]), encoding="utf-8") + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_CORRUPT" + + def test_schema_too_new_raises_inflight_schema_too_new( + self, isolated_storage: Path + ) -> None: + """Inflight with ``schema_version > INFLIGHT_SCHEMA_VERSION`` → SCHEMA_TOO_NEW. + + Forward-compat guard: if a future CLI bumps the schema version, + an older CLI reading the new file should fail loudly rather + than silently round-tripping a partially-understood payload. + """ + path = inflight_path() + path.parent.mkdir(parents=True, exist_ok=True, mode=0o700) + future = { + "schema_version": INFLIGHT_SCHEMA_VERSION + 1, + "region": "us", + "client_id": "c", + "redirect_uri": "http://localhost:19284/callback", + "pkce_verifier": "x" * 64, + "state": "s" * 16, + "created_at": int(time.time()), + "expires_at": int(time.time()) + INFLIGHT_TTL_SECONDS, + } + path.write_text(json.dumps(future), encoding="utf-8") + with pytest.raises(OAuthError) as exc: + load_inflight() + assert exc.value.code == "OAUTH_INFLIGHT_SCHEMA_TOO_NEW" + assert exc.value.details is not None + assert exc.value.details["schema_version"] == INFLIGHT_SCHEMA_VERSION + 1 + assert exc.value.details["supported"] == INFLIGHT_SCHEMA_VERSION + + +class TestClearInflight: + """clear_inflight is idempotent.""" + + def test_idempotent_on_missing_file(self, isolated_storage: Path) -> None: + """No file → no-op, no exception.""" + clear_inflight() + clear_inflight() # second call also fine + + def test_removes_existing_file(self, isolated_storage: Path) -> None: + """File exists → unlinked.""" + save_inflight(_make_session()) + assert inflight_path().exists() + clear_inflight() + assert not inflight_path().exists() + + +class TestPlaceholderHelpers: + """new_placeholder_dir + read/write tokens + me cache + meta.""" + + def test_new_placeholder_dir_uses_tmp_prefix(self, isolated_storage: Path) -> None: + """Placeholder dir name starts with .tmp- (for the rollback guard).""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + assert placeholder.exists() + assert placeholder.name.startswith(".tmp-") + # Verify mode 0o700. + mode = stat.S_IMODE(placeholder.stat().st_mode) + assert mode == 0o700, f"Expected 0o700, got {oct(mode)}" + + def test_save_and_read_placeholder_meta(self, isolated_storage: Path) -> None: + """meta.json round-trips region for --resume.""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + save_placeholder_meta(placeholder, region="eu") + meta = read_placeholder_meta(placeholder) + assert meta is not None + assert meta["region"] == "eu" + assert meta["schema_version"] == PLACEHOLDER_META_SCHEMA_VERSION + + def test_read_placeholder_meta_missing_returns_none( + self, isolated_storage: Path + ) -> None: + """No meta file → None (caller falls back to a default region).""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + assert read_placeholder_meta(placeholder) is None + + def test_read_placeholder_meta_corrupt_raises(self, isolated_storage: Path) -> None: + """Malformed JSON in meta.json → OAUTH_PLACEHOLDER_META_CORRUPT. + + The previous behavior silently returned ``None``, which let + ``--resume`` default to ``us`` for an EU/IN placeholder and then + delete the recoverable tokens via the + ``NeedsRegionSwitchError`` cleanup path. Now: corrupt is loud. + """ + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + (placeholder / "meta.json").write_text("not json", encoding="utf-8") + with pytest.raises(OAuthError) as exc: + read_placeholder_meta(placeholder) + assert exc.value.code == "OAUTH_PLACEHOLDER_META_CORRUPT" + + def test_read_placeholder_meta_non_dict_raises( + self, isolated_storage: Path + ) -> None: + """meta.json holding a JSON array → OAUTH_PLACEHOLDER_META_CORRUPT.""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + (placeholder / "meta.json").write_text( + json.dumps(["not", "a", "dict"]), encoding="utf-8" + ) + with pytest.raises(OAuthError) as exc: + read_placeholder_meta(placeholder) + assert exc.value.code == "OAUTH_PLACEHOLDER_META_CORRUPT" + + def test_read_tokens_from_placeholder_round_trip( + self, isolated_storage: Path + ) -> None: + """Tokens persisted by token_payload_bytes round-trip via the reader.""" + from datetime import datetime, timezone + + from pydantic import SecretStr + + from mixpanel_headless._internal.auth.token import ( + OAuthTokens, + token_payload_bytes, + ) + from mixpanel_headless._internal.io_utils import atomic_write_bytes + + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + original = OAuthTokens( + access_token=SecretStr("access-abc"), + refresh_token=SecretStr("refresh-xyz"), + expires_at=datetime(2099, 1, 1, tzinfo=timezone.utc), + scope="read write", + token_type="Bearer", + ) + atomic_write_bytes(placeholder / "tokens.json", token_payload_bytes(original)) + loaded = read_tokens_from_placeholder(placeholder) + assert loaded.access_token.get_secret_value() == "access-abc" + assert loaded.refresh_token is not None + assert loaded.refresh_token.get_secret_value() == "refresh-xyz" + assert loaded.expires_at == original.expires_at + assert loaded.scope == "read write" + + def test_read_tokens_missing_file_raises(self, isolated_storage: Path) -> None: + """Missing tokens.json → OAUTH_TOKEN_ERROR.""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + with pytest.raises(OAuthError) as exc: + read_tokens_from_placeholder(placeholder) + assert exc.value.code == "OAUTH_TOKEN_ERROR" + + def test_cache_me_round_trip(self, isolated_storage: Path) -> None: + """MeResponse → me.json → MeResponse.""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + me = MeResponse( + user_id=42, + user_email="user@example.com", + projects={ + "100": MeProjectInfo( + name="Test", organization_id=1, domain="mixpanel.com" + ), + }, + ) + cache_me_in_placeholder(placeholder, me) + loaded = load_cached_me_from_placeholder(placeholder) + assert loaded is not None + assert isinstance(loaded, MeResponse) + assert loaded.user_id == 42 + assert "100" in loaded.projects + + def test_load_cached_me_missing_returns_none(self, isolated_storage: Path) -> None: + """No me.json → None (caller does a fresh /me fetch).""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + assert load_cached_me_from_placeholder(placeholder) is None + + def test_load_cached_me_stale_returns_none(self, isolated_storage: Path) -> None: + """cached_at older than INFLIGHT_TTL_SECONDS → None (treat as stale).""" + accounts_root = isolated_storage / "accounts" + placeholder = new_placeholder_dir(accounts_root) + me = MeResponse(user_id=42) + cache_me_in_placeholder(placeholder, me) + # Rewrite the cached_at field to be older than the TTL. + me_path = placeholder / "me.json" + data = json.loads(me_path.read_text()) + data["cached_at"] = time.time() - INFLIGHT_TTL_SECONDS - 1 + me_path.write_text(json.dumps(data), encoding="utf-8") + assert load_cached_me_from_placeholder(placeholder) is None + + +class TestFindAvailableCallbackPort: + """find_available_callback_port returns one of the registered ports.""" + + def test_returns_a_callback_port(self) -> None: + """Happy path — returns a port in the CALLBACK_PORTS set.""" + from mixpanel_headless._internal.auth.callback_server import CALLBACK_PORTS + + port = find_available_callback_port() + assert port in CALLBACK_PORTS diff --git a/tests/unit/test_loc_budget.py b/tests/unit/test_loc_budget.py index b7ee98d..a9615bb 100644 --- a/tests/unit/test_loc_budget.py +++ b/tests/unit/test_loc_budget.py @@ -63,13 +63,18 @@ def _auth_subsystem_files() -> list[Path]: class TestLocBudget: """FR-067: keep the auth subsystem within a defensible size envelope.""" - FILE_COUNT_CAP = 20 - """Maximum number of auth-subsystem files. A 21st file fails this test.""" + FILE_COUNT_CAP = 21 + """Maximum number of auth-subsystem files. A 22nd file fails this test. - LOC_CAP = 8800 - """Maximum total LoC across the auth subsystem (~7% headroom over current). + Bumped 20 → 21 by the two-shot ``mp login --start/--finish/--resume`` + flow which added ``_internal/auth/inflight.py`` (on-disk PKCE + verifier + placeholder helpers). + """ + + LOC_CAP = 10500 + """Maximum total LoC across the auth subsystem (~5% headroom over current). - Bumped 6500 → 6700 → 8800 by 043 (frictionless-auth): + Bumped 6500 → 6700 → 8800 → 10500: - 6500 → 6700 covered the two new files (``_internal/auth/region_probe.py``, ``_internal/auth/naming.py``) plus the relaxations in ``cli/commands/account.py`` / @@ -77,8 +82,14 @@ class TestLocBudget: - 6700 → 8800 added ``accounts.py`` (1500+ lines after the ``login_unified`` orchestrator landed) to the scope so the orchestrator's growth is also guarded going forward. - See ``specs/043-frictionless-auth/plan.md`` §"Scale/Scope" and the - rescue plan at ``~/.claude/plans/thoroughly-and-rigorously-and-gentle-lemur.md``. + - 8800 → 10500 covers the two-shot ``mp login --start/--finish/--resume`` + flow: ``inflight.py`` (~480 lines) + ~700 lines added to + ``accounts.py`` (``ProjectPickResult``, auto-pick filter cascade, + ``_publish_account_from_tokens``, the three new ``login_unified_*`` + entry points). + See ``specs/043-frictionless-auth/plan.md`` and the contracts under + ``specs/043-frictionless-auth/contracts/`` for the two-shot flow + design that motivated the most recent bump. """ def test_file_count_cap(self) -> None: diff --git a/tests/unit/test_resolve_project_autopick.py b/tests/unit/test_resolve_project_autopick.py new file mode 100644 index 0000000..d1dca27 --- /dev/null +++ b/tests/unit/test_resolve_project_autopick.py @@ -0,0 +1,453 @@ +"""Tests for ``accounts._resolve_project`` — auto-pick algorithm and fallbacks. + +Covers the new ``ProjectPickResult`` shape, the auto-pick filter cascade +(region → drop demos → drop unintegrated → primary-org-lowest-id), the +fallback chain (re-include unintegrated → re-include demos), and the +``cross_region_only`` raise. The existing legacy ``auto_pick=False`` +behavior (TTY picker / E-8 raise) is also exercised so the refactor +doesn't regress same-machine ``mp login``. +""" + +from __future__ import annotations + +import pytest + +from mixpanel_headless._internal.me import MeOrgInfo, MeProjectInfo, MeResponse +from mixpanel_headless.accounts import _resolve_project +from mixpanel_headless.exceptions import ( + ConfigError, + NeedsRegionSwitchError, + ProjectNotFoundError, +) + + +def _proj( + *, + name: str, + org_id: int, + domain: str = "mixpanel.com", + is_demo: bool = False, + has_integrated: bool = True, +) -> MeProjectInfo: + """Build a MeProjectInfo with the auto-pick-relevant fields set. + + ``is_demo`` and ``has_integrated`` are undeclared on the model + (live API returns them via ``model_extra``); pass them as keyword + args here and Pydantic's ``extra="allow"`` shovels them into + ``model_extra``. + """ + return MeProjectInfo( + name=name, + organization_id=org_id, + domain=domain, + is_demo=is_demo, + has_integrated=has_integrated, + ) + + +def _me( + *, + projects: dict[str, MeProjectInfo], + org_names: dict[int, str] | None = None, +) -> MeResponse: + """Build a MeResponse, deriving organizations from project org_ids.""" + org_names = org_names or {} + org_ids = {p.organization_id for p in projects.values()} + organizations = { + str(oid): MeOrgInfo(id=oid, name=org_names.get(oid, f"Org {oid}")) + for oid in org_ids + } + return MeResponse( + user_id=1, + user_email="user@example.com", + projects=projects, + organizations=organizations, + ) + + +class TestExplicitProject: + """``--project ID`` always wins, regardless of filters or auto_pick.""" + + def test_explicit_project_overrides_demo(self) -> None: + """User passed --project even on a demo? They get the demo.""" + me = _me(projects={"3": _proj(name="A demo", org_id=1, is_demo=True)}) + pick = _resolve_project( + me_resp=me, + explicit_project="3", + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.project_id == "3" + assert pick.method == "explicit" + + def test_explicit_project_not_visible_raises(self) -> None: + """--project ID not in /me → ProjectNotFoundError.""" + me = _me(projects={"3": _proj(name="A", org_id=1)}) + with pytest.raises(ProjectNotFoundError): + _resolve_project( + me_resp=me, + explicit_project="999", + project_picker=None, + auto_pick=True, + region="us", + ) + + def test_explicit_skips_region_filter(self) -> None: + """--project on a wrong-region project still resolves (caller's + ``_assert_project_region_matches`` catches the mismatch with E-2).""" + me = _me(projects={"3": _proj(name="EU", org_id=1, domain="eu.mixpanel.com")}) + pick = _resolve_project( + me_resp=me, + explicit_project="3", + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.project_id == "3" + assert pick.method == "explicit" + + +class TestSoleSurvivor: + """When the filter chain leaves exactly one project, no question to ask.""" + + def test_single_region_compat_project(self) -> None: + """One region-compat project → method='sole_survivor'.""" + me = _me(projects={"3": _proj(name="Only", org_id=1)}) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.project_id == "3" + assert pick.method == "sole_survivor" + + def test_single_project_legacy_path(self) -> None: + """auto_pick=False path also short-circuits on 1 project.""" + me = _me(projects={"3": _proj(name="Only", org_id=1)}) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=False, + region="us", + ) + assert pick.project_id == "3" + assert pick.method == "sole_survivor" + + def test_sole_survivor_filtered_when_demos_filtered_out(self) -> None: + """Two region-compat projects, one demo → ``sole_survivor_filtered``. + + With multiple region-compat projects the line 2684 short-circuit + does NOT fire — the auto-pick filter cascade runs and reduces + the survivor set to one. The method should reflect that the + single survivor came out of a *filtered* pool, so user-facing + copy ("the only non-demo, integrated project") is accurate. + """ + me = _me( + projects={ + "10": _proj(name="Real", org_id=1), + "20": _proj(name="Demo", org_id=1, is_demo=True), + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.project_id == "10" + assert pick.method == "sole_survivor_filtered" + # Funnel still reflects the broader region-compatible pool. + assert pick.region_compatible_count == 2 + assert pick.filtered_count == 1 + assert pick.demo_excluded == 1 + + +class TestPrimaryOrgLowestId: + """Normal auto-pick: highest-survivor-count org, lowest project ID.""" + + def test_picks_from_higher_count_org(self) -> None: + """Two orgs of unequal survivor counts → pick from the bigger one.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), + "20": _proj(name="B", org_id=1), + "30": _proj(name="C", org_id=2), + }, + org_names={1: "Big", 2: "Small"}, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.method == "primary_org_lowest_id" + assert pick.primary_org_id == "1" + assert pick.primary_org_name == "Big" + assert pick.primary_org_survivor_count == 2 + # Lowest project ID in the primary org wins. + assert pick.project_id == "10" + + def test_org_tiebreak_lowest_org_id(self) -> None: + """Orgs with equal survivor counts → lowest org ID wins.""" + me = _me( + projects={ + "100": _proj(name="P1", org_id=2), + "200": _proj(name="P2", org_id=1), + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.primary_org_id == "1" + assert pick.project_id == "200" + + +class TestFallbackWithUnintegrated: + """When no integrated projects exist, re-include unintegrated ones.""" + + def test_fallback_when_all_unintegrated(self) -> None: + """All projects unintegrated → method='fallback_with_unintegrated'.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1, has_integrated=False), + "20": _proj(name="B", org_id=1, has_integrated=False), + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.method == "fallback_with_unintegrated" + assert pick.project_id == "10" + + +class TestFallbackWithDemos: + """When all projects are demos AND unintegrated, re-include both.""" + + def test_fallback_when_all_demos(self) -> None: + """All projects are demos → method='fallback_with_demos'.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1, is_demo=True), + "20": _proj(name="B", org_id=1, is_demo=True), + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.method == "fallback_with_demos" + assert pick.project_id == "10" + + +class TestCrossRegionOnly: + """0 region-compat projects → NeedsRegionSwitchError.""" + + def test_eu_user_authed_to_us_raises(self) -> None: + """All projects in eu, auth is us → NeedsRegionSwitchError.""" + me = _me( + projects={ + "10": _proj(name="EU-A", org_id=1, domain="eu.mixpanel.com"), + "20": _proj(name="EU-B", org_id=1, domain="eu.mixpanel.com"), + }, + ) + with pytest.raises(NeedsRegionSwitchError) as exc: + _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert exc.value.pick.method == "cross_region_only" + assert exc.value.pick.cross_region_projects is not None + assert len(exc.value.pick.cross_region_projects) == 2 + + def test_message_suggests_correct_region(self) -> None: + """Error message points the user at the right --region flag.""" + me = _me( + projects={"10": _proj(name="E", org_id=1, domain="eu.mixpanel.com")}, + ) + with pytest.raises(NeedsRegionSwitchError) as exc: + _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert "--region eu" in exc.value.message + + +class TestPickerPriority: + """Picker preempts auto-pick when both are wired.""" + + def test_picker_wins_over_auto_pick(self) -> None: + """auto_pick=True with a picker present → picker fires.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), + "20": _proj(name="B", org_id=1), + }, + ) + + def picker(_me, sorted_projects): + """Pick the LAST entry to prove the picker ran.""" + return sorted_projects[-1][0] + + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=picker, + auto_pick=True, + region="us", + ) + assert pick.method == "tty_picker" + assert pick.project_id == "20" + + def test_explicit_skips_picker(self) -> None: + """explicit_project set → picker is NOT called.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), + "20": _proj(name="B", org_id=1), + }, + ) + + def picker(_me, _projects): + """Should never be called when explicit_project is set.""" + pytest.fail("picker should not have been called") + + pick = _resolve_project( + me_resp=me, + explicit_project="10", + project_picker=picker, + auto_pick=True, + region="us", + ) + assert pick.method == "explicit" + + +class TestLegacyPathPreservesBehavior: + """auto_pick=False paths preserve the pre-043 same-machine behavior.""" + + def test_multi_project_no_picker_raises_e8(self) -> None: + """auto_pick=False, multi-project, no picker → ConfigError E-8.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), + "20": _proj(name="B", org_id=1), + }, + ) + with pytest.raises(ConfigError) as exc: + _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=False, + region="us", + ) + assert "Multiple projects" in exc.value.message + + def test_legacy_path_does_not_filter_by_region(self) -> None: + """auto_pick=False does NOT region-filter (preserves pre-043 behavior). + + Legacy path was: picker sees all projects, post-pick E-2 catches + wrong-region. We preserve that — auto_pick=False with multiple + wrong-region projects raises E-8 (multi-project / no picker) + rather than NeedsRegionSwitchError. + """ + me = _me( + projects={ + "10": _proj(name="EU-A", org_id=1, domain="eu.mixpanel.com"), + "20": _proj(name="EU-B", org_id=1, domain="eu.mixpanel.com"), + }, + ) + with pytest.raises(ConfigError) as exc: + _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=False, + region="us", + ) + # Importantly: NOT NeedsRegionSwitchError. + assert not isinstance(exc.value, NeedsRegionSwitchError) + + +class TestNoProjects: + """Account with zero accessible projects.""" + + def test_empty_me_returns_no_projects(self) -> None: + """me_resp.projects empty → method='no_projects', project_id=None.""" + me = _me(projects={}) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.method == "no_projects" + assert pick.project_id is None + + +class TestFunnelCounts: + """ProjectPickResult carries the right exclusion counts.""" + + def test_demo_excluded_count(self) -> None: + """demo_excluded reflects how many region-compat were dropped.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), # kept + "20": _proj(name="B", org_id=1, is_demo=True), # dropped + "30": _proj(name="C", org_id=1, is_demo=True), # dropped + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.demo_excluded == 2 + assert pick.unintegrated_excluded == 0 + assert pick.region_compatible_count == 3 + assert pick.filtered_count == 1 + + def test_unintegrated_excluded_count(self) -> None: + """unintegrated_excluded counts non-demo projects without integration.""" + me = _me( + projects={ + "10": _proj(name="A", org_id=1), # kept + "20": _proj(name="B", org_id=1, has_integrated=False), # dropped + }, + ) + pick = _resolve_project( + me_resp=me, + explicit_project=None, + project_picker=None, + auto_pick=True, + region="us", + ) + assert pick.demo_excluded == 0 + assert pick.unintegrated_excluded == 1 + assert pick.filtered_count == 1 diff --git a/uv.lock b/uv.lock index e612a21..81d402c 100644 --- a/uv.lock +++ b/uv.lock @@ -18,7 +18,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-05-07T23:44:29.929192Z" +exclude-newer = "2026-05-08T01:53:24.316593Z" exclude-newer-span = "P7D" [[package]] @@ -668,6 +668,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[package.optional-dependencies] +socks = [ + { name = "socksio" }, +] + [[package]] name = "hypothesis" version = "6.151.13" @@ -1300,7 +1305,7 @@ name = "mixpanel-headless" source = { editable = "." } dependencies = [ { name = "anytree" }, - { name = "httpx" }, + { name = "httpx", extra = ["socks"] }, { name = "jq" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -1350,7 +1355,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "anytree", specifier = ">=2.12" }, - { name = "httpx", specifier = ">=0.27" }, + { name = "httpx", extras = ["socks"], specifier = ">=0.27" }, { name = "hypothesis", extras = ["cli"], marker = "extra == 'dev'", specifier = ">=6.151.13" }, { name = "interrogate", marker = "extra == 'dev'", specifier = ">=1.7" }, { name = "ipykernel", marker = "extra == 'notebook'", specifier = ">=6.0" }, @@ -2825,6 +2830,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "socksio" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" }, +] + [[package]] name = "sortedcontainers" version = "2.4.0"