Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
members = [
"clickgraph-client",
"clickgraph-embedded",
"clickgraph-py",
]

[package]
Expand Down
2 changes: 1 addition & 1 deletion clickgraph-embedded/src/connection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use clickgraph::graph_catalog::graph_schema::GraphSchema;

use super::database::Database;
use super::error::EmbeddedError;
use super::query_result::{QueryResult, Row};
use super::query_result::QueryResult;
use super::value::Value;

/// A connection to an embedded ClickGraph database.
Expand Down
14 changes: 14 additions & 0 deletions clickgraph-py/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Compiled extensions
*.so
*.pyd
*.dll

# Python
__pycache__/
*.pyc
*.egg-info/
dist/
build/

# maturin
target/
16 changes: 16 additions & 0 deletions clickgraph-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "clickgraph-py"
version = "0.1.0"
edition = "2021"
rust-version = "1.85"
description = "Python bindings for ClickGraph embedded graph query engine"
repository = "https://github.com/genezhang/clickgraph"
license = "Apache-2.0"

[lib]
name = "_clickgraph"
crate-type = ["cdylib"]

[dependencies]
clickgraph-embedded = { path = "../clickgraph-embedded" }
pyo3 = { version = "0.23", features = ["extension-module"] }
86 changes: 86 additions & 0 deletions clickgraph-py/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# clickgraph — Python bindings

Embedded graph query engine — run Cypher queries over Parquet, Iceberg, Delta Lake and S3 data without a ClickHouse server.

## Quick Start

```python
import clickgraph

db = clickgraph.Database("schema.yaml")
conn = db.connect()

for row in conn.query("MATCH (u:User) RETURN u.name LIMIT 5"):
print(row["u.name"])
```

## API

### `Database(schema_path, **kwargs)`

Open an embedded database from a YAML schema file.

**Keyword arguments** (all optional):
- `session_dir` — directory for chdb session data (default: temp dir)
- `data_dir` — base directory for relative `source:` paths
- `max_threads` — maximum threads for chdb
- `s3_access_key_id`, `s3_secret_access_key`, `s3_region`, `s3_endpoint_url`, `s3_session_token` — S3 credentials
- `gcs_access_key_id`, `gcs_secret_access_key` — GCS HMAC credentials
- `azure_storage_account_name`, `azure_storage_account_key`, `azure_storage_connection_string` — Azure credentials

### `Database.connect() → Connection`

Create a connection for executing queries.

### `Database.execute(cypher) → QueryResult`

Shorthand — execute a query without creating a separate connection.

### `Connection.query(cypher) → QueryResult`

Execute a Cypher query. Returns an iterable of row dicts.

### `Connection.query_to_sql(cypher) → str`

Translate Cypher to ClickHouse SQL without executing.

### `QueryResult`

- Iterable: `for row in result:` — each row is a `dict`
- `result.column_names` — list of column names
- `result.num_rows` — number of rows
- `result.as_dicts()` — all rows as a list of dicts
- `result.get_row(i)` — single row by index as dict
- `len(result)` — number of rows

## Installation

```bash
# From source (requires Rust toolchain + chdb)
cd clickgraph-py
pip install maturin
maturin develop
```

## Example with S3 data

```python
import clickgraph

db = clickgraph.Database(
"schema.yaml",
s3_access_key_id="AKIA...",
s3_secret_access_key="...",
s3_region="us-east-1",
)

conn = db.connect()
result = conn.query("""
MATCH (u:User)-[:FOLLOWS]->(f:User)
WHERE u.name = 'Alice'
RETURN f.name, f.email
""")

for row in result:
print(f"{row['f.name']}: {row['f.email']}")
```
24 changes: 24 additions & 0 deletions clickgraph-py/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"

[project]
name = "clickgraph"
version = "0.1.0"
description = "Embedded graph query engine — run Cypher over Parquet, Iceberg, S3 and ClickHouse"
requires-python = ">=3.8"
license = "Apache-2.0"
readme = "README.md"
keywords = ["graph", "cypher", "clickhouse", "embedded", "analytics"]
classifiers = [
"Development Status :: 4 - Beta",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Rust",
"Topic :: Database",
]

[tool.maturin]
features = []
python-source = "python"
module-name = "clickgraph._clickgraph"
28 changes: 28 additions & 0 deletions clickgraph-py/python/clickgraph/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""ClickGraph — embedded graph query engine for Python.

Run Cypher queries over Parquet, Iceberg, Delta Lake and S3 data
without a ClickHouse server.

Quick start::

import clickgraph

db = clickgraph.Database("schema.yaml")
conn = db.connect()
for row in conn.query("MATCH (u:User) RETURN u.name LIMIT 5"):
print(row["u.name"])

With S3 credentials::

db = clickgraph.Database(
"schema.yaml",
s3_access_key_id="AKIA...",
s3_secret_access_key="...",
s3_region="us-east-1",
)
"""

from clickgraph._clickgraph import Database, Connection, QueryResult

__all__ = ["Database", "Connection", "QueryResult"]
__version__ = "0.1.0"
Loading
Loading