pasteurlabs · angela-ko · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
@@ -31,13 +31,24 @@ class ArrayModel(BaseModel):
     data: Array[(None,), Float64]
 
 
-ENCODINGS = ["json", "base64", "binref"]
+ENCODINGS = ["json", "base64", "binref", "base64+lz4", "binref+lz4"]
 
 # Maps short encoding name to the format string used by output_to_bytes
 _ENCODING_TO_FORMAT: dict[str, supported_format_type] = {
     "json": "json",
     "base64": "json+base64",
     "binref": "json+binref",
+    "base64+lz4": "json+base64",
+    "binref+lz4": "json+binref",
+}
+
+# Maps short encoding name to extra kwargs passed to output_to_bytes
+_ENCODING_TO_KWARGS: dict[str, dict] = {
+    "json": {},
+    "base64": {},
+    "binref": {},
+    "base64+lz4": {"base64_compression": "lz4"},
+    "binref+lz4": {"binref_compression": "lz4"},
 }
 
 
@@ -78,44 +89,50 @@ def test_encoding(benchmark, encoding_and_size):
     encoding, size = encoding_and_size
     model = ArrayModel(data=create_test_array(size))
     fmt = _ENCODING_TO_FORMAT[encoding]
+    extra_kwargs = _ENCODING_TO_KWARGS[encoding]
+    uses_binref = "binref" in encoding
 
     with tempfile.TemporaryDirectory() as tmpdir:
-        if encoding == "binref":
+        if uses_binref:
 
             def setup():
                 _clear_dir(tmpdir)
 
             benchmark.pedantic(
                 output_to_bytes,
                 args=(model, fmt),
-                kwargs={"base_dir": tmpdir},
+                kwargs={"base_dir": tmpdir, **extra_kwargs},
                 setup=setup,
                 rounds=_binref_rounds(size),
             )
         else:
-            benchmark(output_to_bytes, model, fmt)
+            benchmark(output_to_bytes, model, fmt, **extra_kwargs)
 
 
 def test_decoding(benchmark, encoding_and_size):
     encoding, size = encoding_and_size
     model = ArrayModel(data=create_test_array(size))
     fmt = _ENCODING_TO_FORMAT[encoding]
+    extra_kwargs = _ENCODING_TO_KWARGS[encoding]
+    uses_binref = "binref" in encoding
 
     with tempfile.TemporaryDirectory() as tmpdir:
         ctx: dict[str, str] = {}
-        if encoding == "binref":
+        if uses_binref:
             ctx["base_dir"] = tmpdir
 
-        encoded = output_to_bytes(model, fmt, base_dir=tmpdir)
+        encoded = output_to_bytes(model, fmt, base_dir=tmpdir, **extra_kwargs)
 
-        if encoding == "binref":
+        if uses_binref:
             # binref filenames are random UUIDs, so we must re-encode in setup
             # and pass the fresh payload to the decode call via a mutable wrapper.
             payload = [encoded]
 
             def setup():
                 _clear_dir(tmpdir)
-                payload[0] = output_to_bytes(model, fmt, base_dir=tmpdir)
+                payload[0] = output_to_bytes(
+                    model, fmt, base_dir=tmpdir, **extra_kwargs
+                )
 
             def decode():
                 ArrayModel.model_validate_json(payload[0], context=ctx)
@@ -129,17 +146,19 @@ def test_roundtrip(benchmark, encoding_and_size):
     encoding, size = encoding_and_size
     model = ArrayModel(data=create_test_array(size))
     fmt = _ENCODING_TO_FORMAT[encoding]
+    extra_kwargs = _ENCODING_TO_KWARGS[encoding]
+    uses_binref = "binref" in encoding
 
     with tempfile.TemporaryDirectory() as tmpdir:
         ctx: dict[str, str] = {}
-        if encoding == "binref":
+        if uses_binref:
             ctx["base_dir"] = tmpdir
 
         def roundtrip():
-            enc = output_to_bytes(model, fmt, base_dir=tmpdir)
+            enc = output_to_bytes(model, fmt, base_dir=tmpdir, **extra_kwargs)
             ArrayModel.model_validate_json(enc, context=ctx)
 
-        if encoding == "binref":
+        if uses_binref:
 
             def setup():
                 _clear_dir(tmpdir)

@@ -140,3 +140,13 @@ $ curl \
 The `.bin` file references are relative to the `--output-path`.
 :::
 ::::
+
+### binref + lz4 compression
+
+Set `TESSERACT_BINREF_COMPRESSION=lz4` to compress arrays in `.bin` files. Each array is compressed individually, preserving offset-based random access. The compressed size is embedded directly in the buffer path (`<file>:<offset>:<compressed_size>`).
+
+```bash
+$ TESSERACT_BINREF_COMPRESSION=lz4 tesseract run vectoradd apply -f "json+binref" -o /tmp/output @examples/vectoradd/example_inputs.json
+$ cat /tmp/output/results.json
+{"result":{"object_type":"array","shape":[3],"dtype":"float64","data":{"buffer":"....bin:0:35","encoding":"binref","compression":"lz4"}}}
+```
@@ -10,7 +10,7 @@ requires-python = ">=3.10,<3.15"
 dependencies = [
     "jinja2",
     "rich",
-    "typer>=0.16",
+    "typer",
     "pyyaml",
     "pydantic",
     "numpy",
@@ -40,16 +40,17 @@ tesseract-runtime = "tesseract_core.runtime.cli:main"
 # do not edit manually. To add constraints, use other operators (e.g. <, >=, ~=, ==) as needed.
 runtime = [
     "pydantic<=2.13.4,>=2.10",
-    "fastapi<=0.138.0,>=0.115",
+    "fastapi<=0.136.1,>=0.115",
     "requests<=2.34.2,>=2.32.4",
-    "uvicorn<=0.49.0,>=0.34",
-    "typer<=0.26.7,>=0.16",
-    "fsspec[http,s3]<=2026.6.0,>=2024.12",
+    "uvicorn<=0.47.0,>=0.34",
+    "click<=8.4.0,>=8.1",
+    "typer<=0.25.1,>=0.15",
+    "fsspec[http,s3]<=2026.4.0,>=2024.12",
     "pybase64<=1.4.3,>=1.4",
     "orjson<=3.11.9,>=3.10",
-    "numpy<=2.5.0,>=1.26",
-    "debugpy<=1.8.21,>=1.8.14",
-    "mlflow-skinny<=3.14.0,>=3.7.0",
+    "numpy<=2.4.5,>=1.26",
+    "debugpy<=1.8.20,>=1.8.14",
+    "mlflow-skinny<=3.12.0,>=3.7.0",
 ]
 # END RUNTIME DEPENDENCIES
 
@@ -68,8 +69,7 @@ docs = [
 ]
 dev = [
     "docker",
-    "httpx",  # required by fastapi older test client
-    "httpx2",  # required by fastapi newer test client
+    "httpx",  # required by fastapi test client
     "pre-commit",
     "pytest",
     "pytest-cov",
@@ -85,6 +85,11 @@ dev = [
     # also add all other extras here
     "tesseract-core[runtime]",
     "tesseract-core[docs]",
+    "tesseract-core[compression]",
+]
+
+compression = [
+    "lz4",
 ]
 
 [project.urls]

@@ -4,13 +4,15 @@ annotated-doc==0.0.4
     # via typer
 annotated-types==0.7.0
     # via pydantic
-certifi==2026.6.17
+certifi==2026.4.22
     # via requests
 charset-normalizer==3.4.7
     # via requests
-colorama==0.4.6 ; sys_platform == 'win32'
+click==8.4.0
     # via typer
-idna==3.18
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via click
+idna==3.15
     # via requests
 jinja2==3.1.6
     # via tesseract-core
@@ -24,15 +26,13 @@ mdurl==0.1.2
     # via markdown-it-py
 numpy==2.2.6 ; python_full_version < '3.11'
     # via tesseract-core
-numpy==2.4.6 ; python_full_version == '3.11.*'
-    # via tesseract-core
-numpy==2.5.0 ; python_full_version >= '3.12'
+numpy==2.4.5 ; python_full_version >= '3.11'
     # via tesseract-core
 orjson==3.11.9
     # via tesseract-core
 packaging==26.2
     # via tesseract-core
-pip==26.1.2
+pip==26.1.1
     # via tesseract-core
 pybase64==1.4.3
     # via tesseract-core
@@ -52,7 +52,7 @@ rich==15.0.0
     #   typer
 shellingham==1.5.4
     # via typer
-typer==0.26.7
+typer==0.25.1
     # via tesseract-core
 typing-extensions==4.15.0
     # via