|
| 1 | +import logging |
| 2 | +import os |
| 3 | +from typing import Optional |
| 4 | + |
| 5 | +import click |
| 6 | +from dotenv import load_dotenv |
| 7 | + |
| 8 | +from cratedb_toolkit import DatabaseCluster |
| 9 | +from cratedb_toolkit.query.llm.api import DataQuery |
| 10 | +from cratedb_toolkit.query.llm.model import DatabaseInfo, ModelInfo, ModelProvider |
| 11 | +from cratedb_toolkit.util.common import setup_logging |
| 12 | + |
| 13 | +logger = logging.getLogger(__name__) |
| 14 | + |
| 15 | + |
| 16 | +def help_llm(): |
| 17 | + """ |
| 18 | + Use an LLM to query the database in human language. |
| 19 | +
|
| 20 | + Synopsis |
| 21 | + ======== |
| 22 | +
|
| 23 | + export CRATEDB_CLUSTER_URL=crate://localhost/ |
| 24 | + ctk query llm "What is the average value for sensor 1?" |
| 25 | +
|
| 26 | + """ # noqa: E501 |
| 27 | + |
| 28 | + |
| 29 | +@click.command() |
| 30 | +@click.argument("question") |
| 31 | +@click.option("--schema", envvar="CRATEDB_SCHEMA", type=str, required=False, help="Schema where to operate on") |
| 32 | +@click.option("--llm-provider", envvar="LLM_PROVIDER", type=str, required=True, help="LLM provider name") |
| 33 | +@click.option("--llm-name", envvar="LLM_NAME", type=str, required=False, help="LLM model name for completions") |
| 34 | +@click.option( |
| 35 | + "--llm-embedding-name", envvar="LLM_EMBEDDING_NAME", type=str, required=False, help="LLM model name for embeddings" |
| 36 | +) |
| 37 | +@click.option("--llm-api-key", envvar="LLM_API_KEY", type=str, required=False, help="LLM API key") |
| 38 | +@click.pass_context |
| 39 | +def llm_cli( |
| 40 | + ctx: click.Context, |
| 41 | + question: str, |
| 42 | + schema: Optional[str], |
| 43 | + llm_provider: str, |
| 44 | + llm_name: Optional[str], |
| 45 | + llm_embedding_name: Optional[str], |
| 46 | + llm_api_key: Optional[str], |
| 47 | +): |
| 48 | + """ |
| 49 | + Use an LLM to query a database in human language. |
| 50 | + """ |
| 51 | + setup_logging() |
| 52 | + load_dotenv() |
| 53 | + |
| 54 | + # Connect to database. |
| 55 | + dc = DatabaseCluster.from_options(ctx.meta["address"]) |
| 56 | + engine = dc.adapter.engine |
| 57 | + schema = os.getenv("CRATEDB_SCHEMA", "doc") |
| 58 | + |
| 59 | + provider = ModelProvider(llm_provider) |
| 60 | + |
| 61 | + # Parameter sanity checks and heuristics. |
| 62 | + if not llm_name: |
| 63 | + if provider in [ModelProvider.OPENAI, ModelProvider.AZURE]: |
| 64 | + llm_name = "gpt-4.1" |
| 65 | + elif provider in [ModelProvider.OLLAMA]: |
| 66 | + llm_name = "gemma3:1b" |
| 67 | + else: |
| 68 | + raise ValueError("LLM completion model not selected") |
| 69 | + if not llm_embedding_name: |
| 70 | + if provider in [ModelProvider.OPENAI, ModelProvider.AZURE]: |
| 71 | + llm_embedding_name = "text-embedding-3-large" |
| 72 | + elif provider in [ModelProvider.OLLAMA]: |
| 73 | + llm_embedding_name = "local" |
| 74 | + else: |
| 75 | + raise ValueError("LLM embedding model not selected") |
| 76 | + if not llm_api_key: |
| 77 | + if provider in [ModelProvider.OPENAI, ModelProvider.AZURE]: |
| 78 | + llm_api_key = os.getenv("OPENAI_API_KEY") |
| 79 | + |
| 80 | + logger.info("Selected LLM: completion=%s, embedding=%s", llm_name, llm_embedding_name) |
| 81 | + |
| 82 | + # Submit query. |
| 83 | + dq = DataQuery( |
| 84 | + db=DatabaseInfo( |
| 85 | + engine=engine, |
| 86 | + schema=schema, |
| 87 | + ), |
| 88 | + model=ModelInfo( |
| 89 | + provider=provider, |
| 90 | + completion=llm_name, |
| 91 | + embedding=llm_embedding_name, |
| 92 | + api_key=llm_api_key, |
| 93 | + ), |
| 94 | + ) |
| 95 | + response = dq.ask(question) |
| 96 | + |
| 97 | + logger.info("Query was: %s", question) |
| 98 | + logger.info("Answer was: %s", response) |
| 99 | + logger.info("More (metadata, formatted sources):") |
| 100 | + logger.info(response.get_formatted_sources()) |
| 101 | + logger.info(response.metadata) |
| 102 | + return response |
| 103 | + |
| 104 | + # assert "Answer was: The average value for sensor 1 is approximately 17.03." in out # noqa: ERA001 |
0 commit comments