Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions config/taxonomy.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"version": 3,
"version": 4,
"promptRevision": 1,
"terms": [
{ "kind": "event_tag", "slug": "music", "label": "Music", "sortOrder": 1, "active": true },
Expand Down Expand Up @@ -144,10 +144,14 @@
{
"slug": "speakeasies",
"label": "Speakeasies",
"queryTemplate": "best hidden speakeasy bars and secret cocktail lounges in",
"queryTemplate": "hidden speakeasy bars with unmarked doors password entry prohibition-era secret cocktail lounges in",
"targetPlaceCategory": "speakeasy",
"inferCategoryFromModel": true,
"active": true,
"sortOrder": 8
"sortOrder": 8,
"exclusions": {
"speakeasyRules": true
}
}
],
"rankingGuides": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- Re-categorize misclassified speakeasy-profile places that lack hidden-bar signals,
-- then queue editorial re-classification for affected rows.

WITH miscategorized AS (
UPDATE baywire.places
SET
category = 'bar',
classification_hash = NULL,
editorial_hash = NULL
WHERE discovery_profile_slug = 'speakeasies'
AND category = 'speakeasy'
AND NOT (
name ILIKE '%speakeasy%'
OR COALESCE(description, '') ILIKE ANY (ARRAY[
'%hidden%',
'%secret%',
'%password%',
'%unmarked%',
'%prohibition%',
'%back room%',
'%back-room%',
'%alley entrance%',
'%no sign%',
'%hideaway%'
])
)
RETURNING id
)
INSERT INTO baywire.backfill_jobs (id, kind, payload, status, priority, attempts, run_after, created_at, updated_at)
SELECT
gen_random_uuid(),
'classify_place'::baywire."BackfillJobKind",
jsonb_build_object('placeID', id),
'pending'::baywire."BackfillJobStatus",
10,
0,
NOW(),
NOW(),
NOW()
FROM miscategorized;
3 changes: 3 additions & 0 deletions src/ingestion/taxonomy/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,10 @@ export interface DiscoveryProfileDef {
sortOrder: number;
exclusions?: {
coffeeShopRules?: boolean;
speakeasyRules?: boolean;
};
/** When true, category comes from model output + heuristics — not targetPlaceCategory. */
inferCategoryFromModel?: boolean;
metadata?: Record<string, unknown>;
}

Expand Down
2 changes: 2 additions & 0 deletions src/ingestion/taxonomy/validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ const DiscoveryProfileSchema = z.object({
exclusions: z
.object({
coffeeShopRules: z.boolean().optional(),
speakeasyRules: z.boolean().optional(),
})
.optional(),
inferCategoryFromModel: z.boolean().optional(),
metadata: z.record(z.string(), z.unknown()).optional(),
});

Expand Down
19 changes: 19 additions & 0 deletions src/lib/pipeline/placeTaxonomyFields.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import assert from "node:assert/strict";
import test from "node:test";

import { resolvePlaceCategory } from "./placeTaxonomyFields";

test("resolvePlaceCategory infers from model for speakeasies profile", () => {
assert.equal(resolvePlaceCategory("bar", "speakeasies"), "bar");
assert.equal(resolvePlaceCategory("speakeasy", "speakeasies"), "speakeasy");
assert.equal(resolvePlaceCategory("cocktail bar", "speakeasies"), "bar");
});

test("resolvePlaceCategory still forces category for bars profile", () => {
assert.equal(resolvePlaceCategory("restaurant", "bars"), "bar");
assert.equal(resolvePlaceCategory("speakeasy", "bars"), "bar");
});

test("resolvePlaceCategory maps coffee hints on profiles that infer from model", () => {
assert.equal(resolvePlaceCategory("coffee shop", "speakeasies"), "cafe");
});
14 changes: 14 additions & 0 deletions src/lib/pipeline/placeTaxonomyFields.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,26 @@ export function resolvePlaceCategory(
searchType: string,
): PlaceCategoryValue {
const snapshot = getFileTaxonomySnapshot();
const profile = snapshot.discoveryProfile(searchType);

if (profile?.inferCategoryFromModel) {
return resolveCategoryFromRawText(rawCategory, snapshot);
}

const fromProfile = snapshot.categoryForDiscoveryProfile(searchType);
if (fromProfile) return fromProfile as PlaceCategoryValue;

return resolveCategoryFromRawText(rawCategory, snapshot);
}

function resolveCategoryFromRawText(
rawCategory: string,
snapshot: ReturnType<typeof getFileTaxonomySnapshot>,
): PlaceCategoryValue {
const lower = rawCategory.toLowerCase();
if (lower.includes("coffee") || lower.includes("espresso")) return "cafe";
if (lower.includes("speakeasy")) return "speakeasy";
if (lower.includes("cocktail") || lower.includes("bar")) return "bar";

for (const cat of snapshot.placeCategoryAllowList()) {
if (lower === cat || lower.includes(cat)) return cat as PlaceCategoryValue;
Expand Down
40 changes: 39 additions & 1 deletion src/lib/places/discover.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ import {
filterNationalChainDiscoverRows,
logNationalChainDrop,
} from "@/lib/places/nationalChainPlace";
import {
filterSpeakeasyDiscoverRows,
logSpeakeasyDrop,
} from "@/lib/places/speakeasyDiscovery";

const DEFAULT_MODEL = process.env.OPENAI_EXTRACT_MODEL ?? "gpt-4.1-mini";

Expand All @@ -38,6 +42,27 @@ function profileUsesCoffeeRules(searchType: string, snapshot = getFileTaxonomySn
return Boolean(snapshot.discoveryProfile(searchType)?.exclusions?.coffeeShopRules);
}

function profileUsesSpeakeasyRules(searchType: string, snapshot = getFileTaxonomySnapshot()): boolean {
return Boolean(snapshot.discoveryProfile(searchType)?.exclusions?.speakeasyRules);
}

/** Appended as a second system message only for `speakeasies` discovery runs. */
const SPEAKEASY_DISCOVERY_RULES = `This task is ONLY for genuine speakeasies and hidden cocktail lounges in Tampa Bay.

Include only when sources describe:
- Hidden, secret, or unmarked entrances (password doors, alley access, prohibition-era themes, back-room bars).
- A deliberate speakeasy concept — not merely a well-reviewed cocktail bar with a visible storefront.

Hard exclusions — never output:
- Open, visible craft cocktail bars or bar-and-kitchen spots without a hidden/speakeasy concept.
- Downtown cocktail bars, rooftop bars, or restaurants whose main identity is food + drinks on a public street front.

Category guidance:
- Use category "speakeasy" only when the hidden-bar / secret-lounge concept is clear in sources.
- Otherwise omit the row — do not list visible cocktail bars as speakeasies.

Prefer fewer true speakeasies over padding with regular bars that happen to make good cocktails.`;

/** Appended as a second system message only for `coffee_shops` discovery runs. */
const COFFEE_SHOP_DISCOVERY_RULES = `This task is ONLY for local coffee discovery in Tampa Bay.

Expand Down Expand Up @@ -265,14 +290,18 @@ async function searchForPlaces(
: "";

const coffeeRules = profileUsesCoffeeRules(searchType, taxonomy);
const speakeasyRules = profileUsesSpeakeasyRules(searchType, taxonomy);
const userPreamble = coffeeRules
? `Search thoroughly (local roasters, third-wave espresso, neighborhood cafes). Then extract up to ${maxPlaces} places. Every row must satisfy the coffee-shop addendum: local/indie only, no national chain brands.${exclusionBlock}\n\nPrimary search line: ${query}`
: `Search for: ${query}\n\nExtract up to ${maxPlaces} places found into structured data.${exclusionBlock}`;
: speakeasyRules
? `Search for genuine hidden speakeasies and secret cocktail lounges only — unmarked doors, password entry, prohibition-style hideaways. Extract up to ${maxPlaces} qualifying places. Omit visible cocktail bars and bar-and-kitchen spots.${exclusionBlock}\n\nPrimary search line: ${query}`
: `Search for: ${query}\n\nExtract up to ${maxPlaces} places found into structured data.${exclusionBlock}`;

const inputMessages: OpenAI.Responses.ResponseInputItem[] = [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "system", content: TAMPA_BAY_LOCAL_ONLY_RULES },
...(coffeeRules ? [{ role: "system" as const, content: COFFEE_SHOP_DISCOVERY_RULES }] : []),
...(speakeasyRules ? [{ role: "system" as const, content: SPEAKEASY_DISCOVERY_RULES }] : []),
{ role: "user", content: userPreamble },
];

Expand Down Expand Up @@ -302,6 +331,15 @@ async function searchForPlaces(
for (const row of dropped) {
logNationalChainDrop(`discover/${searchType}`, row.name, classifications.get(row)!);
}

if (speakeasyRules) {
const speakeasyFiltered = filterSpeakeasyDiscoverRows(places);
places = speakeasyFiltered.kept;
for (const row of speakeasyFiltered.dropped) {
logSpeakeasyDrop(`discover/${searchType}`, row.name, speakeasyFiltered.classifications.get(row)!);
}
}

return places.map((p) => ({
...p,
searchType,
Expand Down
47 changes: 47 additions & 0 deletions src/lib/places/speakeasyDiscovery.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import assert from "node:assert/strict";
import test from "node:test";

import {
classifySpeakeasyDiscoverRow,
filterSpeakeasyDiscoverRows,
} from "./speakeasyDiscovery";

test("classifySpeakeasyDiscoverRow accepts hidden-bar signals", () => {
const result = classifySpeakeasyDiscoverRow({
name: "Ciro's Speakeasy and Supper Club",
category: "speakeasy",
description: "Password-entry prohibition lounge behind an unmarked door.",
});
assert.equal(result.qualifies, true);
});

test("classifySpeakeasyDiscoverRow rejects visible cocktail bars", () => {
const result = classifySpeakeasyDiscoverRow({
name: "The Copper Shaker",
category: "speakeasy",
description:
"Locally owned craft cocktail bar and kitchen with shareable appetizers and a full kitchen menu.",
});
assert.equal(result.qualifies, false);
assert.equal(result.reason, "visible_cocktail_bar");
});

test("filterSpeakeasyDiscoverRows partitions rows", () => {
const rows = [
{
name: "Hidden Room",
category: "bar",
description: "Secret cocktail lounge with password entry.",
},
{
name: "The Copper Shaker",
category: "speakeasy",
description: "Craft cocktail bar and kitchen downtown.",
},
];

const { kept, dropped } = filterSpeakeasyDiscoverRows(rows);
assert.equal(kept.length, 1);
assert.equal(dropped.length, 1);
assert.equal(kept[0]?.name, "Hidden Room");
});
107 changes: 107 additions & 0 deletions src/lib/places/speakeasyDiscovery.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
export interface SpeakeasyDiscoverRow {
name: string;
category: string;
description: string | null;
}

export interface SpeakeasyClassification {
qualifies: boolean;
reason: string;
}

const POSITIVE_SIGNALS = [
"speakeasy",
"hidden bar",
"hidden cocktail",
"secret bar",
"secret cocktail",
"secret lounge",
"password",
"unmarked",
"prohibition",
"back room",
"back-room",
"alley entrance",
"alley door",
"no sign",
"unassuming door",
"behind a",
"behind the",
"disguised",
"blink and you",
"members only",
"knock to enter",
"velvet rope",
"hideaway",
"intimate hideaway",
] as const;

const VISIBLE_BAR_PHRASES = [
"craft cocktail bar",
"cocktail bar and kitchen",
"downtown cocktail",
"full kitchen",
"kitchen menu",
"shareable appetizers",
] as const;

function combinedText(row: SpeakeasyDiscoverRow): string {
return [row.name, row.description ?? ""].join(" ").toLowerCase();
}

function hasPositiveSignal(text: string): boolean {
return POSITIVE_SIGNALS.some((signal) => text.includes(signal));
}

function looksLikeVisibleCocktailBar(text: string): boolean {
return VISIBLE_BAR_PHRASES.some((phrase) => text.includes(phrase));
}

/** Whether a discovery row is a genuine speakeasy/hidden bar, not a visible cocktail bar. */
export function classifySpeakeasyDiscoverRow(row: SpeakeasyDiscoverRow): SpeakeasyClassification {
const text = combinedText(row);
const positive = hasPositiveSignal(text);
const visibleBar = looksLikeVisibleCocktailBar(text);

if (positive) {
return { qualifies: true, reason: "speakeasy_signal" };
}

if (visibleBar) {
return { qualifies: false, reason: "visible_cocktail_bar" };
}

const lowerCategory = row.category.toLowerCase();
if (lowerCategory.includes("speakeasy")) {
return { qualifies: false, reason: "category_without_signal" };
}

return { qualifies: false, reason: "no_speakeasy_signal" };
}

export function filterSpeakeasyDiscoverRows<T extends SpeakeasyDiscoverRow>(
rows: T[],
): { kept: T[]; dropped: T[]; classifications: Map<T, SpeakeasyClassification> } {
const kept: T[] = [];
const dropped: T[] = [];
const classifications = new Map<T, SpeakeasyClassification>();

for (const row of rows) {
const classification = classifySpeakeasyDiscoverRow(row);
classifications.set(row, classification);
if (classification.qualifies) kept.push(row);
else dropped.push(row);
}

return { kept, dropped, classifications };
}

export function logSpeakeasyDrop(
stage: string,
name: string,
classification: SpeakeasyClassification,
): void {
console.warn(
`[places] ${stage}: dropped non-speakeasy "${name}" (${classification.reason})`,
);
}