diff --git a/config/taxonomy.json b/config/taxonomy.json index 8c79c0a..fa044f6 100644 --- a/config/taxonomy.json +++ b/config/taxonomy.json @@ -1,5 +1,5 @@ { - "version": 3, + "version": 4, "promptRevision": 1, "terms": [ { "kind": "event_tag", "slug": "music", "label": "Music", "sortOrder": 1, "active": true }, @@ -144,10 +144,14 @@ { "slug": "speakeasies", "label": "Speakeasies", - "queryTemplate": "best hidden speakeasy bars and secret cocktail lounges in", + "queryTemplate": "hidden speakeasy bars with unmarked doors password entry prohibition-era secret cocktail lounges in", "targetPlaceCategory": "speakeasy", + "inferCategoryFromModel": true, "active": true, - "sortOrder": 8 + "sortOrder": 8, + "exclusions": { + "speakeasyRules": true + } } ], "rankingGuides": { diff --git a/prisma/migrations/20260613130000_speakeasy_reclassify/migration.sql b/prisma/migrations/20260613130000_speakeasy_reclassify/migration.sql new file mode 100644 index 0000000..934c8c6 --- /dev/null +++ b/prisma/migrations/20260613130000_speakeasy_reclassify/migration.sql @@ -0,0 +1,40 @@ +-- Re-categorize misclassified speakeasy-profile places that lack hidden-bar signals, +-- then queue editorial re-classification for affected rows. + +WITH miscategorized AS ( + UPDATE baywire.places + SET + category = 'bar', + classification_hash = NULL, + editorial_hash = NULL + WHERE discovery_profile_slug = 'speakeasies' + AND category = 'speakeasy' + AND NOT ( + name ILIKE '%speakeasy%' + OR COALESCE(description, '') ILIKE ANY (ARRAY[ + '%hidden%', + '%secret%', + '%password%', + '%unmarked%', + '%prohibition%', + '%back room%', + '%back-room%', + '%alley entrance%', + '%no sign%', + '%hideaway%' + ]) + ) + RETURNING id +) +INSERT INTO baywire.backfill_jobs (id, kind, payload, status, priority, attempts, run_after, created_at, updated_at) +SELECT + gen_random_uuid(), + 'classify_place'::baywire."BackfillJobKind", + jsonb_build_object('placeID', id), + 'pending'::baywire."BackfillJobStatus", + 10, + 0, + NOW(), + NOW(), + NOW() +FROM miscategorized; diff --git a/src/ingestion/taxonomy/types.ts b/src/ingestion/taxonomy/types.ts index 9a6817b..3fa6a7f 100644 --- a/src/ingestion/taxonomy/types.ts +++ b/src/ingestion/taxonomy/types.ts @@ -33,7 +33,10 @@ export interface DiscoveryProfileDef { sortOrder: number; exclusions?: { coffeeShopRules?: boolean; + speakeasyRules?: boolean; }; + /** When true, category comes from model output + heuristics — not targetPlaceCategory. */ + inferCategoryFromModel?: boolean; metadata?: Record; } diff --git a/src/ingestion/taxonomy/validate.ts b/src/ingestion/taxonomy/validate.ts index efc5781..3cf3e1b 100644 --- a/src/ingestion/taxonomy/validate.ts +++ b/src/ingestion/taxonomy/validate.ts @@ -27,8 +27,10 @@ const DiscoveryProfileSchema = z.object({ exclusions: z .object({ coffeeShopRules: z.boolean().optional(), + speakeasyRules: z.boolean().optional(), }) .optional(), + inferCategoryFromModel: z.boolean().optional(), metadata: z.record(z.string(), z.unknown()).optional(), }); diff --git a/src/lib/pipeline/placeTaxonomyFields.test.ts b/src/lib/pipeline/placeTaxonomyFields.test.ts new file mode 100644 index 0000000..5932946 --- /dev/null +++ b/src/lib/pipeline/placeTaxonomyFields.test.ts @@ -0,0 +1,19 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { resolvePlaceCategory } from "./placeTaxonomyFields"; + +test("resolvePlaceCategory infers from model for speakeasies profile", () => { + assert.equal(resolvePlaceCategory("bar", "speakeasies"), "bar"); + assert.equal(resolvePlaceCategory("speakeasy", "speakeasies"), "speakeasy"); + assert.equal(resolvePlaceCategory("cocktail bar", "speakeasies"), "bar"); +}); + +test("resolvePlaceCategory still forces category for bars profile", () => { + assert.equal(resolvePlaceCategory("restaurant", "bars"), "bar"); + assert.equal(resolvePlaceCategory("speakeasy", "bars"), "bar"); +}); + +test("resolvePlaceCategory maps coffee hints on profiles that infer from model", () => { + assert.equal(resolvePlaceCategory("coffee shop", "speakeasies"), "cafe"); +}); diff --git a/src/lib/pipeline/placeTaxonomyFields.ts b/src/lib/pipeline/placeTaxonomyFields.ts index 02d9f64..2742a11 100644 --- a/src/lib/pipeline/placeTaxonomyFields.ts +++ b/src/lib/pipeline/placeTaxonomyFields.ts @@ -24,12 +24,26 @@ export function resolvePlaceCategory( searchType: string, ): PlaceCategoryValue { const snapshot = getFileTaxonomySnapshot(); + const profile = snapshot.discoveryProfile(searchType); + + if (profile?.inferCategoryFromModel) { + return resolveCategoryFromRawText(rawCategory, snapshot); + } + const fromProfile = snapshot.categoryForDiscoveryProfile(searchType); if (fromProfile) return fromProfile as PlaceCategoryValue; + return resolveCategoryFromRawText(rawCategory, snapshot); +} + +function resolveCategoryFromRawText( + rawCategory: string, + snapshot: ReturnType, +): PlaceCategoryValue { const lower = rawCategory.toLowerCase(); if (lower.includes("coffee") || lower.includes("espresso")) return "cafe"; if (lower.includes("speakeasy")) return "speakeasy"; + if (lower.includes("cocktail") || lower.includes("bar")) return "bar"; for (const cat of snapshot.placeCategoryAllowList()) { if (lower === cat || lower.includes(cat)) return cat as PlaceCategoryValue; diff --git a/src/lib/places/discover.ts b/src/lib/places/discover.ts index 76a31b5..d2a7b87 100644 --- a/src/lib/places/discover.ts +++ b/src/lib/places/discover.ts @@ -18,6 +18,10 @@ import { filterNationalChainDiscoverRows, logNationalChainDrop, } from "@/lib/places/nationalChainPlace"; +import { + filterSpeakeasyDiscoverRows, + logSpeakeasyDrop, +} from "@/lib/places/speakeasyDiscovery"; const DEFAULT_MODEL = process.env.OPENAI_EXTRACT_MODEL ?? "gpt-4.1-mini"; @@ -38,6 +42,27 @@ function profileUsesCoffeeRules(searchType: string, snapshot = getFileTaxonomySn return Boolean(snapshot.discoveryProfile(searchType)?.exclusions?.coffeeShopRules); } +function profileUsesSpeakeasyRules(searchType: string, snapshot = getFileTaxonomySnapshot()): boolean { + return Boolean(snapshot.discoveryProfile(searchType)?.exclusions?.speakeasyRules); +} + +/** Appended as a second system message only for `speakeasies` discovery runs. */ +const SPEAKEASY_DISCOVERY_RULES = `This task is ONLY for genuine speakeasies and hidden cocktail lounges in Tampa Bay. + +Include only when sources describe: +- Hidden, secret, or unmarked entrances (password doors, alley access, prohibition-era themes, back-room bars). +- A deliberate speakeasy concept — not merely a well-reviewed cocktail bar with a visible storefront. + +Hard exclusions — never output: +- Open, visible craft cocktail bars or bar-and-kitchen spots without a hidden/speakeasy concept. +- Downtown cocktail bars, rooftop bars, or restaurants whose main identity is food + drinks on a public street front. + +Category guidance: +- Use category "speakeasy" only when the hidden-bar / secret-lounge concept is clear in sources. +- Otherwise omit the row — do not list visible cocktail bars as speakeasies. + +Prefer fewer true speakeasies over padding with regular bars that happen to make good cocktails.`; + /** Appended as a second system message only for `coffee_shops` discovery runs. */ const COFFEE_SHOP_DISCOVERY_RULES = `This task is ONLY for local coffee discovery in Tampa Bay. @@ -265,14 +290,18 @@ async function searchForPlaces( : ""; const coffeeRules = profileUsesCoffeeRules(searchType, taxonomy); + const speakeasyRules = profileUsesSpeakeasyRules(searchType, taxonomy); const userPreamble = coffeeRules ? `Search thoroughly (local roasters, third-wave espresso, neighborhood cafes). Then extract up to ${maxPlaces} places. Every row must satisfy the coffee-shop addendum: local/indie only, no national chain brands.${exclusionBlock}\n\nPrimary search line: ${query}` - : `Search for: ${query}\n\nExtract up to ${maxPlaces} places found into structured data.${exclusionBlock}`; + : speakeasyRules + ? `Search for genuine hidden speakeasies and secret cocktail lounges only — unmarked doors, password entry, prohibition-style hideaways. Extract up to ${maxPlaces} qualifying places. Omit visible cocktail bars and bar-and-kitchen spots.${exclusionBlock}\n\nPrimary search line: ${query}` + : `Search for: ${query}\n\nExtract up to ${maxPlaces} places found into structured data.${exclusionBlock}`; const inputMessages: OpenAI.Responses.ResponseInputItem[] = [ { role: "system", content: SYSTEM_PROMPT }, { role: "system", content: TAMPA_BAY_LOCAL_ONLY_RULES }, ...(coffeeRules ? [{ role: "system" as const, content: COFFEE_SHOP_DISCOVERY_RULES }] : []), + ...(speakeasyRules ? [{ role: "system" as const, content: SPEAKEASY_DISCOVERY_RULES }] : []), { role: "user", content: userPreamble }, ]; @@ -302,6 +331,15 @@ async function searchForPlaces( for (const row of dropped) { logNationalChainDrop(`discover/${searchType}`, row.name, classifications.get(row)!); } + + if (speakeasyRules) { + const speakeasyFiltered = filterSpeakeasyDiscoverRows(places); + places = speakeasyFiltered.kept; + for (const row of speakeasyFiltered.dropped) { + logSpeakeasyDrop(`discover/${searchType}`, row.name, speakeasyFiltered.classifications.get(row)!); + } + } + return places.map((p) => ({ ...p, searchType, diff --git a/src/lib/places/speakeasyDiscovery.test.ts b/src/lib/places/speakeasyDiscovery.test.ts new file mode 100644 index 0000000..7bacddd --- /dev/null +++ b/src/lib/places/speakeasyDiscovery.test.ts @@ -0,0 +1,47 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { + classifySpeakeasyDiscoverRow, + filterSpeakeasyDiscoverRows, +} from "./speakeasyDiscovery"; + +test("classifySpeakeasyDiscoverRow accepts hidden-bar signals", () => { + const result = classifySpeakeasyDiscoverRow({ + name: "Ciro's Speakeasy and Supper Club", + category: "speakeasy", + description: "Password-entry prohibition lounge behind an unmarked door.", + }); + assert.equal(result.qualifies, true); +}); + +test("classifySpeakeasyDiscoverRow rejects visible cocktail bars", () => { + const result = classifySpeakeasyDiscoverRow({ + name: "The Copper Shaker", + category: "speakeasy", + description: + "Locally owned craft cocktail bar and kitchen with shareable appetizers and a full kitchen menu.", + }); + assert.equal(result.qualifies, false); + assert.equal(result.reason, "visible_cocktail_bar"); +}); + +test("filterSpeakeasyDiscoverRows partitions rows", () => { + const rows = [ + { + name: "Hidden Room", + category: "bar", + description: "Secret cocktail lounge with password entry.", + }, + { + name: "The Copper Shaker", + category: "speakeasy", + description: "Craft cocktail bar and kitchen downtown.", + }, + ]; + + const { kept, dropped } = filterSpeakeasyDiscoverRows(rows); + assert.equal(kept.length, 1); + assert.equal(dropped.length, 1); + assert.equal(kept[0]?.name, "Hidden Room"); +}); diff --git a/src/lib/places/speakeasyDiscovery.ts b/src/lib/places/speakeasyDiscovery.ts new file mode 100644 index 0000000..4d60aa4 --- /dev/null +++ b/src/lib/places/speakeasyDiscovery.ts @@ -0,0 +1,107 @@ +export interface SpeakeasyDiscoverRow { + name: string; + category: string; + description: string | null; +} + +export interface SpeakeasyClassification { + qualifies: boolean; + reason: string; +} + +const POSITIVE_SIGNALS = [ + "speakeasy", + "hidden bar", + "hidden cocktail", + "secret bar", + "secret cocktail", + "secret lounge", + "password", + "unmarked", + "prohibition", + "back room", + "back-room", + "alley entrance", + "alley door", + "no sign", + "unassuming door", + "behind a", + "behind the", + "disguised", + "blink and you", + "members only", + "knock to enter", + "velvet rope", + "hideaway", + "intimate hideaway", +] as const; + +const VISIBLE_BAR_PHRASES = [ + "craft cocktail bar", + "cocktail bar and kitchen", + "downtown cocktail", + "full kitchen", + "kitchen menu", + "shareable appetizers", +] as const; + +function combinedText(row: SpeakeasyDiscoverRow): string { + return [row.name, row.description ?? ""].join(" ").toLowerCase(); +} + +function hasPositiveSignal(text: string): boolean { + return POSITIVE_SIGNALS.some((signal) => text.includes(signal)); +} + +function looksLikeVisibleCocktailBar(text: string): boolean { + return VISIBLE_BAR_PHRASES.some((phrase) => text.includes(phrase)); +} + +/** Whether a discovery row is a genuine speakeasy/hidden bar, not a visible cocktail bar. */ +export function classifySpeakeasyDiscoverRow(row: SpeakeasyDiscoverRow): SpeakeasyClassification { + const text = combinedText(row); + const positive = hasPositiveSignal(text); + const visibleBar = looksLikeVisibleCocktailBar(text); + + if (positive) { + return { qualifies: true, reason: "speakeasy_signal" }; + } + + if (visibleBar) { + return { qualifies: false, reason: "visible_cocktail_bar" }; + } + + const lowerCategory = row.category.toLowerCase(); + if (lowerCategory.includes("speakeasy")) { + return { qualifies: false, reason: "category_without_signal" }; + } + + return { qualifies: false, reason: "no_speakeasy_signal" }; +} + +export function filterSpeakeasyDiscoverRows( + rows: T[], +): { kept: T[]; dropped: T[]; classifications: Map } { + const kept: T[] = []; + const dropped: T[] = []; + const classifications = new Map(); + + for (const row of rows) { + const classification = classifySpeakeasyDiscoverRow(row); + classifications.set(row, classification); + if (classification.qualifies) kept.push(row); + else dropped.push(row); + } + + return { kept, dropped, classifications }; +} + +export function logSpeakeasyDrop( + stage: string, + name: string, + classification: SpeakeasyClassification, +): void { + console.warn( + `[places] ${stage}: dropped non-speakeasy "${name}" (${classification.reason})`, + ); +}