diff --git a/guardrails_utilities/python_utils/notifications/get-resource-activity-report/README.md b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/README.md new file mode 100644 index 000000000..91285310d --- /dev/null +++ b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/README.md @@ -0,0 +1,380 @@ +# Get Resource Activity Report + +Export resource deletion activity from Guardrails workspaces to CSV. Two scripts are provided: + +| Script | Backend | Best for | +|--------|---------|----------| +| `fetch_resource_deletions.py` | **Turbot CLI** (`turbot graphql`) | Reliable paginated fetches with calendar-day boundaries, bypasses the console 5K export limit | +| `resource_activity_report.py` | Python `requests` (direct HTTP) | Multi-workspace batch runs, auto-detects Turbot Identity ID | + +Both produce CSV output matching the console Resource Activities export format. + +## Prerequisites + +- [Python 3.8+](https://www.python.org/downloads/) +- [Turbot CLI](https://turbot.com/guardrails/docs/reference/cli/installation) installed and configured +- Turbot CLI credentials at `~/.config/turbot/credentials.yml` + +## Setup + +```bash +cd guardrails_utilities/python_utils/notifications/get-resource-activity-report +pip install -r requirements.txt +``` + +Verify turbot CLI connectivity: + +```bash +turbot graphql --profile my-workspace --query='{ resource(id:"tmod:@turbot/turbot#/") { turbot { title } } }' +``` + +--- + +## fetch_resource_deletions.py (Recommended) + +Uses the turbot CLI for authentication and GraphQL transport. Paginates automatically and writes CSV incrementally (partial data is preserved if interrupted). + +### Key features + +- **Calendar-day boundaries** — `--date 2026-05-07` fetches midnight-to-midnight UTC, no overlap between consecutive days +- **All resource types by default** — captures snapshots, instances, volumes, Lambda functions, etc. in a single run +- **Bypasses the 5K console export limit** — paginated GraphQL fetches with no cap +- **Resource type aliases** — use `--resource-type snapshot` instead of the full `tmod:` URI +- **Safety guard** — blocks unbounded queries (all types + no time filter) to prevent fetching millions of rows + +### Examples + +#### All resource types deleted by Turbot on a single day + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 +# Output: my-workspace-resource-deleted-all-types-2026-05-07.csv +``` + +#### Only EC2 snapshots on a single day + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 --resource-type snapshot +# Output: my-workspace-resource-deleted-snapshot-2026-05-07.csv +``` + +#### Only EC2 instances on a single day + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 --resource-type instance +# Output: my-workspace-resource-deleted-instance-2026-05-07.csv +``` + +#### Date range — all types from May 1 to May 8 + +```bash +python fetch_resource_deletions.py --profile my-workspace --since 2026-05-01 --until 2026-05-08 +# Output: my-workspace-resource-deleted-all-types-2026-05-01.csv +``` + +#### Date range — snapshots only from May 1 to May 8 + +```bash +python fetch_resource_deletions.py --profile my-workspace --since 2026-05-01 --until 2026-05-08 --resource-type snapshot +# Output: my-workspace-resource-deleted-snapshot-2026-05-01.csv +``` + +#### Rolling window — last 3 days, instances only + +```bash +python fetch_resource_deletions.py --profile my-workspace --days 3 --resource-type instance +# Output: my-workspace-resource-deleted-instance-20260508.csv +``` + +#### Rolling window — last 7 days, all types + +```bash +python fetch_resource_deletions.py --profile my-workspace --days 7 +# Output: my-workspace-resource-deleted-all-types-20260508.csv +``` + +#### Custom output file + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 --output may7-report.csv +``` + +#### Auto-detect Turbot Identity and fetch snapshots + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 \ + --resource-type snapshot --auto-detect-actor +# Auto-detects the Turbot Identity ID and workspace URL from credentials.yml +# No need for --actor-id or --workspace-url +``` + +#### Explicit actor ID (if auto-detect is not desired) + +```bash +python fetch_resource_deletions.py --profile another-workspace --date 2026-05-07 \ + --actor-id 123456789012345 \ + --workspace-url "https://another-workspace.cloud.turbot.com" +``` + +#### Specific resource type by full URI + +```bash +python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 \ + --resource-type "tmod:@turbot/aws-lambda#/resource/types/functionVersion" +``` + +#### Open-ended since (no end date) + +```bash +python fetch_resource_deletions.py --profile my-workspace --since 2026-05-01 --resource-type volume +# Fetches from May 1 to now +# Output: my-workspace-resource-deleted-volume-2026-05-01.csv +``` + +#### Day-by-day tracking for a week + +```bash +for d in 01 02 03 04 05 06 07; do + python fetch_resource_deletions.py --profile my-workspace --date 2026-05-$d +done +# Produces: my-workspace-resource-deleted-all-types-2026-05-01.csv through -07.csv +# No overlap between files — each covers midnight-to-midnight UTC +``` + +#### Compare two workspaces for the same day + +```bash +python fetch_resource_deletions.py --profile workspace-a --date 2026-05-07 +python fetch_resource_deletions.py --profile workspace-b --date 2026-05-07 \ + --actor-id 123456789012345 \ + --workspace-url "https://workspace-b.cloud.turbot.com" +``` + +### Resource type aliases + +Short names you can use with `--resource-type` instead of full URIs: + +| Alias | Resource type URI | +|-------|-------------------| +| `snapshot` | `tmod:@turbot/aws-ec2#/resource/types/snapshot` | +| `instance` | `tmod:@turbot/aws-ec2#/resource/types/instance` | +| `volume` | `tmod:@turbot/aws-ec2#/resource/types/volume` | +| `ami` | `tmod:@turbot/aws-ec2#/resource/types/image` | +| `launch-template` | `tmod:@turbot/aws-ec2#/resource/types/launchTemplate` | +| `bucket` | `tmod:@turbot/aws-s3#/resource/types/bucket` | +| `lambda` | `tmod:@turbot/aws-lambda#/resource/types/function` | +| `function-version` | `tmod:@turbot/aws-lambda#/resource/types/functionVersion` | +| `role` | `tmod:@turbot/aws-iam#/resource/types/role` | +| `vpc` | `tmod:@turbot/aws-vpc-core#/resource/types/vpc` | +| `security-group` | `tmod:@turbot/aws-vpc-security#/resource/types/securityGroup` | +| `subscription` | `tmod:@turbot/aws-sns#/resource/types/subscription` | +| `ecs-service` | `tmod:@turbot/aws-ecs#/resource/types/service` | + +You can also pass any `tmod:@turbot/...` URI directly. To find the URI for a resource type not listed above, navigate to the resource type in the Guardrails console and copy the URI from the resource type details, or run: + +```bash +turbot graphql --profile my-workspace --query='{ resourceTypes(filter: "snapshot") { items { uri turbot { title } } } }' --format yaml +``` + +### Command-line reference + +``` +usage: fetch_resource_deletions.py [-h] --profile PROFILE + [--date DATE] [--since SINCE] [--until UNTIL] [--days DAYS] + [--actor-id ACTOR_ID] [--auto-detect-actor] + [--resource-type RESOURCE_TYPE] + [--output OUTPUT] [--workspace-url WORKSPACE_URL] + +options: + --profile PROFILE Turbot CLI profile name (required) + +time range (pick one): + --date DATE Single calendar day, midnight-to-midnight UTC (YYYY-MM-DD) + --since SINCE Start date inclusive (YYYY-MM-DD) + --until UNTIL End date exclusive (YYYY-MM-DD), use with --since + --days DAYS Rolling window in days (default: 1) + + --actor-id ACTOR_ID Turbot actor identity ID (pass explicitly, or use --auto-detect-actor) + --auto-detect-actor Auto-detect Turbot Identity ID from the workspace via GraphQL query + --resource-type TYPE Resource type alias or full tmod URI (default: all types) + --output OUTPUT Output CSV file path (default: auto-generated) + --workspace-url URL Workspace base URL (auto-read from credentials.yml if omitted) +``` + +### Time range behavior + +| Option | Boundary | Example | +|--------|----------|---------| +| `--date 2026-05-07` | `timestamp:>2026-05-07 timestamp:<2026-05-08` | Midnight-to-midnight UTC, no overlap | +| `--since 2026-05-01 --until 2026-05-08` | `timestamp:>2026-05-01 timestamp:<2026-05-08` | Arbitrary range | +| `--since 2026-05-01` | `timestamp:>2026-05-01` | From May 1 to now | +| `--days 3` | `timestamp:>2026-05-05` | Rolling 3 days from today | + +### Safety guard + +To prevent accidental fetches of millions of rows, the script blocks queries that combine **all resource types** (no `--resource-type`) with **no absolute time boundary** (no `--date` or `--since`). Either add a time boundary or specify a resource type. + +--- + +## Output format + +### Columns + +| Column | Description | +|--------|-------------| +| Timestamp | Activity timestamp (DD-Mon-YYYY HH:MM:SS UTC) | +| NotificationType | RESOURCE DELETED | +| Type / Message | Resource type category (e.g., Object > Snapshot) | +| Resource | Resource title (e.g., snap-0abcdef1234567890) | +| Actor | Actor identity name (e.g., Turbot Identity) | +| ResourceId | Guardrails resource ID | +| TrunkPath | Resource hierarchy path, or (deleted) | +| Detail URL | Link to the resource activity page in the console | + +### Example output + +```csv +Timestamp,NotificationType,Type / Message,Resource,Actor,ResourceId,TrunkPath,Detail URL +07-May-2026 13:42:32,RESOURCE DELETED,Object > Snapshot,snap-0abcdef1234567890,Turbot Identity,123456789012345,(deleted),https://my-workspace.cloud.turbot.com/apollo/resources/123456789012345/activity +07-May-2026 11:32:25,RESOURCE DELETED,Object > Instance,i-0abcdef1234567890,Turbot Identity,123456789012346,(deleted),https://my-workspace.cloud.turbot.com/apollo/resources/123456789012346/activity +``` + +--- + +## Credentials + +Both scripts use `~/.config/turbot/credentials.yml` (same as Turbot CLI): + +```yaml +my-workspace: + workspace: "https://my-workspace.cloud.turbot.com" + accessKey: "your-access-key" + secretKey: "your-secret-key" + +another-workspace: + workspace: "https://another-workspace.cloud.turbot.com" + accessKey: "your-access-key" + secretKey: "your-secret-key" +``` + +List configured profiles: + +```bash +turbot workspace list +``` + +--- + +## Case study: Investigating snapshot deletions by Turbot + +A customer reported that EC2 snapshots were being deleted in their workspace. The console Resource Activities report timed out due to the workspace having millions of notifications. Here is the workflow used to investigate and produce a daily report. + +### Step 1 — Fetch snapshot deletions by Turbot for a specific day + +The simplest command — `--auto-detect-actor` queries the workspace to find the Turbot Identity ID automatically, and the workspace URL is read from `credentials.yml`: + +```bash +python fetch_resource_deletions.py \ + --profile my-workspace \ + --date 2026-05-07 \ + --resource-type snapshot \ + --auto-detect-actor +# Auto-detected Turbot Identity: 123456789012345 +# Output: my-workspace-resource-deleted-snapshot-2026-05-07.csv +``` + +This fetches all snapshot deletions by the Turbot automation identity on May 7, midnight-to-midnight UTC. + +### Step 2 — Broaden the scope to all resource types + +To check what else Turbot deleted on the same day, omit `--resource-type`: + +```bash +python fetch_resource_deletions.py \ + --profile my-workspace \ + --date 2026-05-07 \ + --auto-detect-actor +# Output: my-workspace-resource-deleted-all-types-2026-05-07.csv +``` + +A typical breakdown might look like: + +``` + 127 Object > Snapshot + 72 Object > Instance + 7 Object > Function Version + 5 Object > Subscription + 3 Object > Launch Template + 2 Object > Volume + 1 Object > Service + 217 TOTAL +``` + +### Step 3 — Generate a multi-day report + +```bash +for d in 01 02 03 04 05 06 07; do + python fetch_resource_deletions.py \ + --profile my-workspace \ + --date 2026-05-$d \ + --resource-type snapshot \ + --auto-detect-actor +done +``` + +Each file covers exactly midnight-to-midnight UTC with no overlap, making day-over-day comparison reliable. + +### Step 4 — Fetch a date range in a single CSV + +```bash +python fetch_resource_deletions.py \ + --profile my-workspace \ + --since 2026-05-01 --until 2026-05-08 \ + --resource-type snapshot \ + --auto-detect-actor +# Output: my-workspace-resource-deleted-snapshot-2026-05-01.csv +``` + +### Step 5 — Fetch deletions by all actors (not just Turbot) + +Omit `--auto-detect-actor` and `--actor-id` to see deletions by all actors. This helps determine if resources were deleted by Turbot automation, by users, or by external processes: + +```bash +python fetch_resource_deletions.py \ + --profile my-workspace \ + --date 2026-05-07 \ + --resource-type snapshot +``` + +The Actor column in the CSV will show who performed each deletion. + +### Step 6 — Use an explicit actor ID (alternative to auto-detect) + +If you already know the Turbot Identity ID (found via console > Permissions > Turbot Identity), you can pass it directly: + +```bash +python fetch_resource_deletions.py \ + --profile my-workspace \ + --date 2026-05-07 \ + --resource-type snapshot \ + --actor-id 123456789012345 +``` + +### Key findings from this investigation + +- The console Export CSV is capped at **5,000 rows** — this script has no such limit +- The console report times out on workspaces with millions of notifications — this script paginates reliably +- `--auto-detect-actor` queries the workspace for the Turbot Identity ID automatically — no need to look it up manually, and the workspace URL is auto-read from `credentials.yml` +- The Turbot Identity ID is **different per workspace** — do not reuse an ID from one workspace on another +- The `metadata.stats.total` in the GraphQL response is **approximate** and does not reflect timestamp filters — use the actual row count +- Without `--actor-id` or `--auto-detect-actor`, deletions by all actors are returned, including "Unidentified Identity" (typically AWS-side deletions not initiated by Guardrails) +- Without `--resource-type`, all resource types are fetched — useful for a full picture but requires a time boundary (`--date` or `--since`) to avoid fetching millions of rows + +--- + +## Notes + +- **metadata.stats.total is approximate** — the total count in the GraphQL response does not apply all filter conditions (particularly timestamp boundaries). The actual item count from pagination is the accurate number. +- **Turbot Identity ID** — the actor identity ID for the Turbot automation identity varies per workspace. Use `--auto-detect-actor` to query it automatically, or find it via the console under Permissions > Turbot Identity and pass `--actor-id` explicitly. Omit both to fetch deletions by all actors. +- **Workspace URL auto-detection** — the workspace URL is automatically read from `~/.config/turbot/credentials.yml` when `--workspace-url` is not provided. This populates the Detail URL column in the CSV. +- **Timestamps are UTC** — all `--date`, `--since`, and `--until` values are interpreted as UTC midnight boundaries. diff --git a/guardrails_utilities/python_utils/notifications/get-resource-activity-report/fetch_resource_deletions.py b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/fetch_resource_deletions.py new file mode 100755 index 000000000..7dff1bda4 --- /dev/null +++ b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/fetch_resource_deletions.py @@ -0,0 +1,306 @@ +#!/usr/bin/env python3 +""" +Fetch 'Resources Deleted by Turbot' from a Guardrails workspace using the turbot +CLI, with pagination and CSV output matching the console export format. + +Supports calendar-day boundaries (midnight-to-midnight UTC) for consistent +day-over-day tracking. Fetches all resource types by default when a time +boundary is provided; optionally filter to a specific resource type. + +Usage: + # All resource types deleted by Turbot on a single day + python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 + + # Only snapshots + python fetch_resource_deletions.py --profile my-workspace --date 2026-05-07 \ + --resource-type snapshot + + # Date range, all types + python fetch_resource_deletions.py --profile my-workspace --since 2026-05-01 --until 2026-05-08 + + # Rolling window + python fetch_resource_deletions.py --profile my-workspace --days 3 +""" + +import argparse +import csv +import json +import os +import subprocess +import sys +from datetime import datetime, timedelta + +import yaml + +QUERY_FILE = os.path.join(os.path.dirname(__file__), "resource_deleted_by_turbot.graphql") +PAGE_SIZE = 200 + +WORKSPACE_URLS = { + # Add your workspace profiles here: + # "my-workspace": "https://my-workspace.cloud.turbot.com", +} + +TURBOT_IDENTITY_IDS = { + # Add Turbot Identity IDs per workspace (find via console > Permissions > Turbot Identity): + # "my-workspace": "123456789012345", +} + +RESOURCE_TYPE_ALIASES = { + "snapshot": "tmod:@turbot/aws-ec2#/resource/types/snapshot", + "ec2-snapshot": "tmod:@turbot/aws-ec2#/resource/types/snapshot", + "instance": "tmod:@turbot/aws-ec2#/resource/types/instance", + "ec2-instance": "tmod:@turbot/aws-ec2#/resource/types/instance", + "volume": "tmod:@turbot/aws-ec2#/resource/types/volume", + "ec2-volume": "tmod:@turbot/aws-ec2#/resource/types/volume", + "ami": "tmod:@turbot/aws-ec2#/resource/types/image", + "launch-template": "tmod:@turbot/aws-ec2#/resource/types/launchTemplate", + "bucket": "tmod:@turbot/aws-s3#/resource/types/bucket", + "s3-bucket": "tmod:@turbot/aws-s3#/resource/types/bucket", + "function": "tmod:@turbot/aws-lambda#/resource/types/function", + "lambda": "tmod:@turbot/aws-lambda#/resource/types/function", + "function-version": "tmod:@turbot/aws-lambda#/resource/types/functionVersion", + "role": "tmod:@turbot/aws-iam#/resource/types/role", + "vpc": "tmod:@turbot/aws-vpc-core#/resource/types/vpc", + "security-group": "tmod:@turbot/aws-vpc-security#/resource/types/securityGroup", + "subscription": "tmod:@turbot/aws-sns#/resource/types/subscription", + "ecs-service": "tmod:@turbot/aws-ecs#/resource/types/service", +} + +CSV_HEADERS = [ + "Timestamp", "NotificationType", "Type / Message", + "Resource", "Actor", "ResourceId", "TrunkPath", "Detail URL", +] + + +def resolve_resource_type(value): + if value is None: + return None + if value.startswith("tmod:"): + return value + alias = RESOURCE_TYPE_ALIASES.get(value.lower()) + if alias: + return alias + print(f"Error: unknown resource type alias '{value}'.", file=sys.stderr) + print(f"Known aliases: {', '.join(sorted(RESOURCE_TYPE_ALIASES.keys()))}", file=sys.stderr) + print(f"Or pass a full tmod:@turbot/... URI.", file=sys.stderr) + sys.exit(1) + + +def run_turbot_graphql(profile, query, variables=None, exit_on_error=True): + cmd = [ + "turbot", "graphql", + "--profile", profile, + "--format", "json", + "--query", query, + ] + if variables: + cmd.extend(["--variables", json.dumps(variables)]) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) + if result.returncode != 0: + err = result.stderr.strip() or result.stdout.strip() or "(no details)" + if exit_on_error: + print(f"turbot CLI error: {err}", file=sys.stderr) + sys.exit(1) + raise RuntimeError(err) + return json.loads(result.stdout) + + +def run_query(profile, variables): + return run_turbot_graphql(profile, QUERY_FILE, variables) + + +def detect_turbot_identity(profile): + query = ("{ resources(filter: \"resourceTypeId:'tmod:@turbot/turbot-iam" + "#/resource/types/turbotIdentity' limit:1\") " + "{ items { turbot { id title } } } }") + try: + data = run_turbot_graphql(profile, query, exit_on_error=False) + items = data.get("resources", {}).get("items", []) + if items: + actor_id = items[0]["turbot"]["id"] + print(f"Auto-detected Turbot Identity: {actor_id}") + return actor_id + except Exception as e: + print(f"Warning: auto-detect failed: {e}", file=sys.stderr) + print("Warning: could not auto-detect Turbot Identity ID. Fetching deletions by ALL actors.", + file=sys.stderr) + return None + + +def detect_workspace_url(profile): + creds_path = os.path.expanduser("~/.config/turbot/credentials.yml") + try: + with open(creds_path) as f: + creds = yaml.safe_load(f) + profile_data = creds.get(profile) or {} + url = str(profile_data.get("workspace", "")).rstrip("/") + return url if url else "" + except (FileNotFoundError, AttributeError): + return "" + + +def build_filter(actor_id, resource_type_uri, date, since, until, days): + parts = ["notificationType:resource_deleted"] + if actor_id: + parts.append(f"actorIdentityId:{actor_id}") + if resource_type_uri: + parts.append(f"resourceTypeId:'{resource_type_uri}'") + + if date: + next_day = (datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)).strftime("%Y-%m-%d") + parts.append(f"timestamp:>{date}") + parts.append(f"timestamp:<{next_day}") + elif since and until: + parts.append(f"timestamp:>{since}") + parts.append(f"timestamp:<{until}") + elif since: + parts.append(f"timestamp:>{since}") + else: + start = (datetime.utcnow() - timedelta(days=days)).strftime("%Y-%m-%d") + parts.append(f"timestamp:>{start}") + + filter_str = " ".join(parts) + filters = [filter_str, f"limit:{PAGE_SIZE}"] + + return filters + + +def format_timestamp(iso_ts): + dt = datetime.fromisoformat(iso_ts.replace("Z", "+00:00")) + return dt.strftime("%d-%b-%Y %H:%M:%S") + + +def format_actor(actor): + if not actor: + return "" + identity = actor.get("identity") or {} + persona = actor.get("persona") or {} + id_title = identity.get("title", "") + pe_title = persona.get("title", "") + if pe_title and pe_title != id_title: + return f"{id_title} > {pe_title}" + return id_title + + +def to_csv_row(item, workspace_url): + ts = format_timestamp(item["turbot"]["createTimestamp"]) + nt = item["notificationType"].replace("_", " ").upper() + resource = item.get("resource") or {} + res_turbot = resource.get("turbot") or {} + res_type_title = (resource.get("type") or {}).get("turbot", {}).get("title", "") + trunk_title = (resource.get("trunk") or {}).get("title") or "(deleted)" + type_msg = f"Object > {res_type_title}" if res_type_title else "" + actor_str = format_actor(item.get("actor")) + res_id = res_turbot.get("id", "") + detail_url = f"{workspace_url}/apollo/resources/{res_id}/activity" if res_id else "" + + return [ + ts, nt, type_msg, + res_turbot.get("title", ""), + actor_str, + res_turbot.get("id", ""), + trunk_title, + detail_url, + ] + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch resource deletions by Turbot from a Guardrails workspace via turbot CLI", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +resource type aliases: + snapshot, instance, volume, ami, launch-template, bucket, + lambda, function-version, role, vpc, security-group, + subscription, ecs-service + + Or pass a full URI: tmod:@turbot/aws-ec2#/resource/types/snapshot + +examples: + %(prog)s --profile my-workspace --date 2026-05-07 + %(prog)s --profile my-workspace --date 2026-05-07 --resource-type snapshot + %(prog)s --profile my-workspace --since 2026-05-01 --until 2026-05-08 + %(prog)s --profile my-workspace --days 7 --resource-type instance +""", + ) + parser.add_argument("--profile", required=True, help="Turbot CLI profile name (e.g. my-workspace)") + + time_group = parser.add_argument_group("time range (pick one)") + time_group.add_argument("--date", help="Single calendar day, midnight-to-midnight UTC (YYYY-MM-DD)") + time_group.add_argument("--since", help="Start date inclusive (YYYY-MM-DD)") + time_group.add_argument("--until", help="End date exclusive (YYYY-MM-DD), use with --since") + time_group.add_argument("--days", type=int, default=1, help="Rolling window in days (default: 1)") + + parser.add_argument("--actor-id", help="Turbot actor identity ID") + parser.add_argument("--auto-detect-actor", action="store_true", + help="Auto-detect Turbot Identity ID from the workspace") + parser.add_argument("--resource-type", + help="Resource type alias or tmod URI (default: all types)") + parser.add_argument("--output", help="Output CSV file path (default: auto-generated)") + parser.add_argument("--workspace-url", help="Workspace base URL (auto-read from credentials.yml if omitted)") + args = parser.parse_args() + + workspace_url = args.workspace_url or WORKSPACE_URLS.get(args.profile) or detect_workspace_url(args.profile) + resource_type_uri = resolve_resource_type(args.resource_type) + + actor_id = args.actor_id or TURBOT_IDENTITY_IDS.get(args.profile) + if not actor_id and args.auto_detect_actor: + actor_id = detect_turbot_identity(args.profile) + if not actor_id: + print(f"Warning: no actor-id for profile '{args.profile}'. Fetching deletions by ALL actors.", + file=sys.stderr) + print(f" Use --auto-detect-actor or --actor-id to filter by Turbot Identity.", file=sys.stderr) + + if not resource_type_uri and not args.date and not args.since: + print("Error: fetching all resource types without a time boundary is unsafe (millions of rows).", + file=sys.stderr) + print("Add --date, --since, or --resource-type to bound the query.", file=sys.stderr) + sys.exit(1) + + filters = build_filter(actor_id, resource_type_uri, args.date, args.since, args.until, args.days) + + type_tag = args.resource_type or "all-types" + date_tag = args.date or args.since or datetime.now().strftime("%Y%m%d") + output_file = args.output or f"{args.profile}-resource-deleted-{type_tag}-{date_tag}.csv" + + scope = resource_type_uri or "all resource types" + print(f"Profile: {args.profile}") + print(f"Resource type: {scope}") + print(f"Filters: {filters}") + print(f"Output: {output_file}") + print() + + paging = None + page = 0 + total_written = 0 + + with open(output_file, "w", newline="") as f: + writer = csv.writer(f) + writer.writerow(CSV_HEADERS) + + while True: + page += 1 + variables = {"filter": filters} + if paging: + variables["paging"] = paging + + data = run_query(args.profile, variables) + notifications = data.get("notifications", {}) + items = notifications.get("items", []) + + for item in items: + writer.writerow(to_csv_row(item, workspace_url)) + f.flush() + total_written += len(items) + + print(f" Page {page}: {len(items)} items (cumulative: {total_written})") + + paging = notifications.get("paging", {}).get("next") + if not paging or not items: + break + + print(f"\nDone. {total_written} rows written to {output_file}") + + +if __name__ == "__main__": + main() diff --git a/guardrails_utilities/python_utils/notifications/get-resource-activity-report/requirements.txt b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/requirements.txt new file mode 100644 index 000000000..7daf2ab7f --- /dev/null +++ b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/requirements.txt @@ -0,0 +1,2 @@ +requests>=2.28.0 +PyYAML>=6.0 diff --git a/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_activity_report.py b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_activity_report.py new file mode 100644 index 000000000..05a9c1198 --- /dev/null +++ b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_activity_report.py @@ -0,0 +1,605 @@ +#!/usr/bin/env python3 +""" +Guardrails Resource Activity Report + +Pulls resource create/delete/update activity by a specific actor (e.g. Turbot +automation identity) from one or more Guardrails workspaces. Outputs CSV +matching the console Resource Activities report format. + +Designed for cases where the console Resource Activities report times out on +large notification datasets. The script fetches only resource-level CRUD +notifications (not control/policy processing), making it fast even on +workspaces with millions of notifications. + +Authentication uses ~/.config/turbot/credentials.yml (same as Turbot CLI). +""" + +import argparse +import csv +import os +import sys +import time +from base64 import b64encode +from datetime import datetime, timedelta, timezone + +import requests +import yaml + +CREDENTIALS_PATH = os.path.expanduser("~/.config/turbot/credentials.yml") +GRAPHQL_PATH = "api/v5/graphql" +# 100 is the largest filter `limit:` value that still returns a working +# paging cursor on Guardrails. Values ≥500 hit a server-side cap (~255 items) +# AND null the cursor, killing pagination. Don't raise without re-validating. +PAGE_SIZE = 100 +DEFAULT_TIMEOUT = 300 +PROBE_TIMEOUT = 30 + +# Short aliases for common AWS resource types. Users can also pass full +# `tmod:@turbot/...` URIs directly. +RESOURCE_TYPE_ALIASES = { + "snapshot": "tmod:@turbot/aws-ec2#/resource/types/snapshot", + "ec2-snapshot": "tmod:@turbot/aws-ec2#/resource/types/snapshot", + "instance": "tmod:@turbot/aws-ec2#/resource/types/instance", + "ec2-instance": "tmod:@turbot/aws-ec2#/resource/types/instance", + "volume": "tmod:@turbot/aws-ec2#/resource/types/volume", + "ami": "tmod:@turbot/aws-ec2#/resource/types/image", + "vpc": "tmod:@turbot/aws-vpc-core#/resource/types/vpc", + "security-group": "tmod:@turbot/aws-vpc-security#/resource/types/securityGroup", + "bucket": "tmod:@turbot/aws-s3#/resource/types/bucket", + "s3-bucket": "tmod:@turbot/aws-s3#/resource/types/bucket", + "role": "tmod:@turbot/aws-iam#/resource/types/role", + "iam-role": "tmod:@turbot/aws-iam#/resource/types/role", + "user": "tmod:@turbot/aws-iam#/resource/types/user", + "iam-user": "tmod:@turbot/aws-iam#/resource/types/user", + "iam-policy": "tmod:@turbot/aws-iam#/resource/types/policy", + "lambda": "tmod:@turbot/aws-lambda#/resource/types/function", + "rds-instance": "tmod:@turbot/aws-rds#/resource/types/dbInstance", + "rds-cluster": "tmod:@turbot/aws-rds#/resource/types/dbCluster", + "rds-snapshot": "tmod:@turbot/aws-rds#/resource/types/dbSnapshot", + "kms-key": "tmod:@turbot/aws-kms#/resource/types/key", +} + + +def resolve_resource_types(values): + """Expand --resource-type values: alias or full URI; commas split a value.""" + resolved = [] + for v in values: + for token in v.split(","): + token = token.strip() + if not token: + continue + resolved.append(RESOURCE_TYPE_ALIASES.get(token, token)) + return resolved + +NOTIFICATIONS_QUERY = """ +query ResourceActivity($filter: [String!], $paging: String, $dataSource: NotificationDataSource) { + notifications(filter: $filter, paging: $paging, dataSource: $dataSource) { + items { + turbot { + id + createTimestamp + } + notificationType + resource { + type { + title + category { + title + } + } + trunk { + title + } + turbot { + id + title + } + } + actor { + identity { + trunk { + title + } + turbot { + id + title + } + } + } + } + paging { + next + } + } +} +""" + +TURBOT_IDENTITY_QUERY = """ +query FindTurbotIdentity($filter: [String!]) { + resources(filter: $filter) { + items { + turbot { + id + title + } + } + } +} +""" + +COUNT_QUERY = """ +query CountActivity($filter: [String!]) { + notifications(filter: $filter) { + metadata { stats { total } } + } +} +""" + + +def load_profile(profile_name): + """Load workspace credentials from ~/.config/turbot/credentials.yml.""" + if not os.path.exists(CREDENTIALS_PATH): + print(f"Error: Credentials file not found at {CREDENTIALS_PATH}") + print( + "Create it with your workspace profiles. See: " + "https://turbot.com/guardrails/docs/reference/cli/installation" + "#set-up-your-turbot-guardrails-credentials" + ) + sys.exit(1) + + with open(CREDENTIALS_PATH, "r") as f: + creds = yaml.safe_load(f) + + if profile_name not in creds: + available = ", ".join(creds.keys()) + print(f"Error: Profile '{profile_name}' not found. Available: {available}") + sys.exit(1) + + profile = creds[profile_name] + for key in ("workspace", "accessKey", "secretKey"): + if key not in profile: + print(f"Error: Profile '{profile_name}' missing '{key}'") + sys.exit(1) + + workspace = str(profile["workspace"]).rstrip("/") + endpoint = f"{workspace}/{GRAPHQL_PATH}" + + access_key = str(profile["accessKey"]) + secret_key = str(profile["secretKey"]) + auth_token = b64encode(f"{access_key}:{secret_key}".encode("utf-8")).decode() + del access_key, secret_key + + return workspace, { + "endpoint": endpoint, + "headers": { + "Authorization": f"Basic {auth_token}", + "Content-Type": "application/json", + }, + } + + +def graphql_request(config, query, variables=None, timeout=DEFAULT_TIMEOUT): + """Execute a GraphQL query against the workspace.""" + payload = {"query": query} + if variables: + payload["variables"] = variables + + response = requests.post( + config["endpoint"], + json=payload, + headers=config["headers"], + timeout=timeout, + ) + response.raise_for_status() + result = response.json() + + if "errors" in result: + for err in result["errors"]: + msg = str(err.get("message", "unknown error")) + print(f" GraphQL error: {msg}") + + return result + + +def get_turbot_identity_id(config): + """Auto-detect the Turbot Identity actor ID in the workspace.""" + result = graphql_request( + config, + TURBOT_IDENTITY_QUERY, + { + "filter": [ + "resourceTypeId:'tmod:@turbot/turbot-iam#/resource/types/turbotIdentity'", + "limit:1", + ] + }, + ) + items = (result.get("data") or {}).get("resources", {}).get("items", []) + if items: + return items[0]["turbot"]["id"] + return None + + +DEFAULT_NOTIFICATION_TYPES = ("resource_created", "resource_deleted", "resource_updated") + + +def build_base_filter(resource_type_id, actor_id, since_date=None, until_date=None, notification_types=None): + """Build the filter string matching the console's report shape. + + - `resourceTypeId:` (not `resourceType:`) is the indexed field name. + - Numeric `actorIdentityId:` is unquoted (URI-style values still use quotes). + - Timestamps must be `YYYY-MM-DD` (date-only); the parser rejects full ISO8601. + - No `sort:` clause — server's default compound sort `(-id, -timestamp)` is + what makes cursor pagination stable across mass-delete bursts. + """ + ntypes = notification_types or DEFAULT_NOTIFICATION_TYPES + parts = [ + f"resourceTypeId:'{resource_type_id}'", + f"actorIdentityId:{actor_id}", + f"notificationType:{','.join(ntypes)}", + ] + if since_date: + parts.append(f"timestamp:>{since_date}") + if until_date: + parts.append(f"timestamp:<{until_date}") + return " ".join(parts) + + +def count_window(config, base_filter, timeout=PROBE_TIMEOUT): + """Return the stats total for `base_filter`, or None on failure.""" + try: + result = graphql_request(config, COUNT_QUERY, {"filter": base_filter}, timeout=timeout) + meta = ((result.get("data") or {}).get("notifications") or {}).get("metadata") or {} + return (meta.get("stats") or {}).get("total") + except Exception as e: + print(f" Count query failed: {e}") + return None + + +def fetch_window(config, base_filter, page_size=PAGE_SIZE, data_source=None): + """Paginate one window's results. + + `base_filter` is the per-window filter string; `limit:` is appended as a + separate filter array element to match the console's variable shape. + """ + filter_array = [base_filter, f"limit:{page_size}"] + all_items = [] + next_page = None + page_num = 0 + + while True: + page_num += 1 + variables = {"filter": filter_array, "paging": next_page} + if data_source: + variables["dataSource"] = data_source + + max_retries = 3 + for attempt in range(1, max_retries + 1): + try: + result = graphql_request(config, NOTIFICATIONS_QUERY, variables) + break + except requests.exceptions.Timeout: + if attempt < max_retries: + wait = attempt * 15 + print(f" Page {page_num}: timeout (attempt {attempt}/{max_retries}) — retrying in {wait}s...") + time.sleep(wait) + else: + print(f" Page {page_num}: timeout after {max_retries} attempts — stopping window") + return all_items, False + + data = result.get("data") or {} + notifications = data.get("notifications") or {} + items = notifications.get("items") or [] + all_items.extend(items) + + paging = notifications.get("paging") or {} + if page_num == 1 or page_num % 10 == 0 or not paging.get("next"): + print(f" Page {page_num}: {len(items)} items (total so far: {len(all_items)})") + + if paging and paging.get("next"): + next_page = paging["next"] + else: + break + + return all_items, True + + +def build_windows(days, from_date=None, to_date=None): + """Return [(since_date, until_date, label), ...] one entry per UTC day. + + Each window covers a full UTC day, expressed as YYYY-MM-DD strings + suitable for the `timestamp:>since timestamp:= end: + return [] + + windows = [] + cursor = start + while cursor < end: + win_end = cursor + timedelta(days=1) + if win_end > end: + win_end = end + label = cursor.strftime("%Y-%m-%d") + windows.append((_format_date(cursor), _format_date(win_end), label)) + cursor = win_end + return windows + + +def _parse_date(s): + """Accept YYYY-MM-DD, return tz-aware UTC midnight datetime.""" + return datetime.strptime(s, "%Y-%m-%d").replace(tzinfo=timezone.utc) + + +def _format_date(dt): + return dt.strftime("%Y-%m-%d") + + +def format_row(item, workspace_url): + """Format a notification item as a CSV row.""" + ts_raw = item["turbot"]["createTimestamp"] + ts = datetime.fromisoformat(ts_raw.replace("Z", "+00:00")) + ts_fmt = ts.strftime("%d-%b-%Y %H:%M:%S") + ntype = item["notificationType"].upper().replace("_", " ") + + res_type = item["resource"].get("type") or {} + cat_title = (res_type.get("category") or {}).get("title", "") + type_title = res_type.get("title", "") + type_msg = f"{cat_title} > {type_title}" if cat_title else type_title + + resource_title = item["resource"]["turbot"]["title"] + resource_id = item["resource"]["turbot"]["id"] + trunk = (item["resource"].get("trunk") or {}).get("title", "(deleted)") + + actor = item.get("actor") or {} + identity = actor.get("identity") or {} + actor_name = (identity.get("trunk") or {}).get("title", "") + if not actor_name: + actor_name = (identity.get("turbot") or {}).get("title", "") + + detail_url = f"{workspace_url}/apollo/notifications/{item['turbot']['id']}" + + return { + "Timestamp": ts_fmt, + "NotificationType": ntype, + "Type / Message": type_msg, + "Resource": resource_title, + "Actor": actor_name, + "ResourceId": resource_id, + "TrunkPath": trunk, + "Detail URL": detail_url, + } + + +CSV_FIELDNAMES = [ + "Timestamp", + "NotificationType", + "Type / Message", + "Resource", + "Actor", + "ResourceId", + "TrunkPath", + "Detail URL", +] + + +def write_csv(items, workspace_url, output_path): + """Write items to CSV file.""" + with open(output_path, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=CSV_FIELDNAMES) + writer.writeheader() + for item in items: + writer.writerow(format_row(item, workspace_url)) + return len(items) + + +def run_workspace(profile_name, args, resource_types, notification_types): + """Fetch all windows for one workspace and write CSV(s).""" + workspace, config = load_profile(profile_name) + print(f" Workspace: {workspace}") + + actor_id = args.actor_id + if not actor_id: + print(" Detecting Turbot Identity ID...", end=" ") + detected = get_turbot_identity_id(config) + if not detected: + print("FAILED — specify --actor-id manually") + return + actor_id = str(detected) + print(actor_id) + + windows = build_windows(args.days, args.from_, args.to) + print(f" Date windows: {len(windows)} ({windows[0][0]} → {windows[-1][1]})") + + all_items = [] + summary_rows = [] + + for rt in resource_types: + print(f" Resource type: {rt}") + for since_date, until_date, label in windows: + base_filter = build_base_filter( + rt, actor_id, + since_date=since_date, until_date=until_date, + notification_types=notification_types, + ) + + expected = None + if not args.skip_preflight: + expected = count_window(config, base_filter, timeout=args.probe_timeout) + exp_str = expected if expected is not None else "?" + print(f" [{label}] expected={exp_str}") + + if args.preflight_only: + summary_rows.append((rt, label, expected, None, None)) + continue + + items, ok = fetch_window( + config, base_filter, + page_size=args.page_size, + data_source=args.data_source, + ) + print(f" [{label}] fetched={len(items)} ({'ok' if ok else 'partial'})") + + if expected is not None and ok and len(items) < expected * 0.95: + print(f" WARN: fetched < 95% of expected ({len(items)}/{expected})") + + summary_rows.append((rt, label, expected, len(items), ok)) + + if items and args.per_window_csv: + fname = f"{profile_name}-{_short_type(rt)}-{label}.csv" + path = os.path.join(args.output_dir, fname) + write_csv(items, workspace, path) + print(f" Per-window CSV: {path}") + + all_items.extend(items) + + if args.preflight_only: + return + + if not all_items: + print(" No resource activity found.") + return + + created = sum(1 for i in all_items if i["notificationType"] == "resource_created") + deleted = sum(1 for i in all_items if i["notificationType"] == "resource_deleted") + updated = sum(1 for i in all_items if i["notificationType"] == "resource_updated") + + date_str = datetime.now(timezone.utc).strftime("%Y%m%d") + span = ( + f"{args.from_}_to_{args.to}" + if args.from_ and args.to + else f"{args.days}d-{date_str}" + ) + filename = f"{profile_name}-resource-activity-{span}.csv" + output_path = os.path.join(args.output_dir, filename) + count = write_csv(all_items, workspace, output_path) + + print() + print(f" Consolidated: {count} total" + f" ({created} created, {deleted} deleted, {updated} updated)") + print(f" CSV: {output_path}") + + short_total = sum(1 for r in summary_rows if r[2] is not None and r[3] is not None and r[3] < r[2] * 0.95) + if short_total: + print(f" WARN: {short_total} window(s) returned < 95% of expected — re-run individual days with --from/--to") + + +def _short_type(uri): + """Extract a short label from a resource type URI for filenames.""" + if "/" in uri: + return uri.rsplit("/", 1)[-1] + return uri.replace(":", "_") + + +def main(): + parser = argparse.ArgumentParser( + description="Pull resource activity from Guardrails workspaces (per-day windowed)", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Last 7 days, all default activity types, default snapshot type + %(prog)s --profile my-workspace --days 7 + + # 7 days of snapshot deletions only, with per-window CSVs (resumable) + %(prog)s --profile my-workspace --days 7 \\ + --resource-type snapshot --notification-type resource_deleted \\ + --per-window-csv + + # Backfill a single window + %(prog)s --profile my-workspace --resource-type snapshot \\ + --from 2026-05-01 --to 2026-05-02 + """, + ) + parser.add_argument("--profile", action="append", + help="Turbot CLI profile name (repeatable).") + parser.add_argument("--days", type=int, default=7, + help="Lookback in days (default: 7). Ignored if --from/--to given.") + parser.add_argument("--from", dest="from_", help="Window start (YYYY-MM-DD or ISO8601).") + parser.add_argument("--to", dest="to", help="Window end (YYYY-MM-DD or ISO8601).") + parser.add_argument("--resource-type", action="append", + help="Resource type alias or full tmod URI (repeatable, comma-OK). " + "Default: snapshot. Run --list-types to see aliases.") + parser.add_argument("--notification-type", action="append", + choices=list(DEFAULT_NOTIFICATION_TYPES), + help="Limit to specific notification types (repeatable). " + "Default: all three.") + parser.add_argument("--actor-id", + help="Actor identity ID (default: auto-detect Turbot Identity).") + parser.add_argument("--data-source", choices=["ALL", "DB"], + help="Notification data source (default: server default = ALL).") + parser.add_argument("--page-size", type=int, default=PAGE_SIZE, + help=f"Page size for paginated fetch (default: {PAGE_SIZE}, matches console).") + parser.add_argument("--output-dir", default=".", + help="Output directory for CSV files.") + parser.add_argument("--per-window-csv", action="store_true", + help="Also write one CSV per day window (in addition to consolidated).") + parser.add_argument("--preflight-only", action="store_true", + help="Run per-window count probes and exit without fetching items.") + parser.add_argument("--skip-preflight", action="store_true", + help="Skip per-window count probes (faster but no expected/actual check).") + parser.add_argument("--probe-timeout", type=int, default=PROBE_TIMEOUT, + help=f"Per-window count timeout in seconds (default: {PROBE_TIMEOUT}).") + parser.add_argument("--list-types", action="store_true", + help="Print built-in resource-type aliases and exit.") + + args = parser.parse_args() + + if args.list_types: + print("Resource-type aliases:") + width = max(len(k) for k in RESOURCE_TYPE_ALIASES) + for alias, uri in sorted(RESOURCE_TYPE_ALIASES.items()): + print(f" {alias:<{width}} {uri}") + return + + if not args.profile: + parser.error("--profile is required (unless --list-types is used)") + if bool(args.from_) ^ bool(args.to): + parser.error("--from and --to must be given together") + + os.makedirs(args.output_dir, exist_ok=True) + + resource_types = ( + resolve_resource_types(args.resource_type) + if args.resource_type + else ["tmod:@turbot/aws-ec2#/resource/types/snapshot"] + ) + notification_types = args.notification_type # None means default trio in build_base_filter + + print("Resource Activity Report") + print(f"Profiles: {', '.join(args.profile)}") + print(f"Resource types: {', '.join(resource_types)}") + print(f"Notif types: {', '.join(notification_types or DEFAULT_NOTIFICATION_TYPES)}") + if args.from_: + print(f"Window: {args.from_} → {args.to}") + else: + print(f"Window: last {args.days} day(s)") + print(f"Page size: {args.page_size}") + print(f"Data source: {args.data_source or 'server default (ALL)'}") + if args.preflight_only: + print("Mode: pre-flight only (counts, no fetch)") + print(f"Output dir: {os.path.abspath(args.output_dir)}") + print() + + for profile_name in args.profile: + print(f"[{profile_name}]") + run_workspace(profile_name, args, resource_types, notification_types) + print() + + print("Done.") + + +if __name__ == "__main__": + main() diff --git a/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_deleted_by_turbot.graphql b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_deleted_by_turbot.graphql new file mode 100644 index 000000000..7d8f499ef --- /dev/null +++ b/guardrails_utilities/python_utils/notifications/get-resource-activity-report/resource_deleted_by_turbot.graphql @@ -0,0 +1,19 @@ +query Notifications($filter: [String!], $paging: String) { + notifications(filter: $filter, paging: $paging) { + items { + notificationType + resource { + turbot { id title } + type { turbot { title } } + trunk { title } + } + actor { + identity { title } + persona { title } + } + turbot { id createTimestamp } + } + paging { next } + metadata { stats { total } } + } +}