Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions marimo/_runtime/virtual_file/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,19 @@ def read_chunked(
key: str,
byte_length: int,
chunk_size: int = DEFAULT_CHUNK_SIZE,
start: int = 0,
) -> Iterator[bytes]:
"""Read buffer data by key in chunks.

Yields chunks of bytes, avoiding allocating the full buffer at once.
Useful for streaming large files over HTTP.

Args:
key: storage key
byte_length: total number of bytes to yield (after applying ``start``)
chunk_size: chunk size in bytes
start: offset in bytes to begin reading from (default 0)

Raises:
KeyError: If key not found
"""
Expand Down Expand Up @@ -146,6 +153,7 @@ def read_chunked(
key: str,
byte_length: int,
chunk_size: int = DEFAULT_CHUNK_SIZE,
start: int = 0,
) -> Iterator[bytes]:
if is_pyodide():
raise RuntimeError(
Expand All @@ -155,7 +163,7 @@ def read_chunked(
view = None
try:
shm = shared_memory.SharedMemory(name=key)
view = shm.buf[:byte_length]
view = shm.buf[start : start + byte_length]
for i in range(0, byte_length, chunk_size):
Comment thread
mscolnick marked this conversation as resolved.
yield bytes(view[i : i + chunk_size])
except FileNotFoundError as err:
Expand Down Expand Up @@ -225,12 +233,13 @@ def read_chunked(
key: str,
byte_length: int,
chunk_size: int = DEFAULT_CHUNK_SIZE,
start: int = 0,
) -> Iterator[bytes]:
if key not in self._storage:
raise KeyError(f"Virtual file not found: {key}")
buffer = self._storage[key]
end = min(byte_length, len(buffer))
for i in range(0, end, chunk_size):
end = min(start + byte_length, len(buffer))
for i in range(start, end, chunk_size):
yield buffer[i : min(i + chunk_size, end)]

def remove(self, key: str) -> None:
Expand Down Expand Up @@ -288,6 +297,7 @@ def read_chunked(
filename: str,
byte_length: int,
chunk_size: int = DEFAULT_CHUNK_SIZE,
start: int = 0,
) -> Iterator[bytes]:
"""Read from storage in chunks, with cross-process fallback.

Expand All @@ -301,7 +311,9 @@ def read_chunked(
storage = self.storage
if storage is None:
yield from SharedMemoryStorage().read_chunked(
filename, byte_length, chunk_size
filename, byte_length, chunk_size, start
)
else:
yield from storage.read_chunked(filename, byte_length, chunk_size)
yield from storage.read_chunked(
filename, byte_length, chunk_size, start
)
9 changes: 7 additions & 2 deletions marimo/_runtime/virtual_file/virtual_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,16 +310,21 @@ def read_virtual_file(filename: str, byte_length: int) -> bytes:


def read_virtual_file_chunked(
filename: str, byte_length: int
filename: str, byte_length: int, start: int = 0
) -> Iterator[bytes]:
"""Read a virtual file in chunks for streaming responses.

Yields chunks of bytes, avoiding holding the entire file in memory
as a single bytes object.

Args:
filename: virtual file name
byte_length: number of bytes to read (after applying ``start``)
start: offset in bytes to begin reading from (for HTTP Range requests)
"""
try:
yield from VirtualFileStorageManager().read_chunked(
filename, byte_length
filename, byte_length, start=start
)
except KeyError as err:
raise HTTPException(
Expand Down
74 changes: 68 additions & 6 deletions marimo/_server/api/endpoints/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,17 +540,20 @@ def virtual_file(
detail="Invalid virtual file request",
)

byte_length, filename = filename_and_length.split("-", 1)
if not byte_length.isdigit():
byte_length_str, filename = filename_and_length.split("-", 1)
if not byte_length_str.isdigit():
raise HTTPException(
status_code=404,
detail="Invalid byte length in virtual file request",
)
total_size = int(byte_length_str)

chunks = read_virtual_file_chunked(filename, int(byte_length))
mimetype, _ = mimetypes.guess_type(filename)
headers = {
"Cache-Control": "max-age=86400",
# Advertise range support so Safari (which requires it for media
# playback) will load <audio>/<video> sources. See #9460.
"Accept-Ranges": "bytes",
}
# When ?download=1 is set, force a save dialog. This bypasses cases
# where <a download> is ignored (e.g., sandboxed iframes without
Expand All @@ -560,17 +563,76 @@ def virtual_file(

download_filename = request.query_params.get("filename") or filename
headers.update(make_download_headers(download_filename))
# Do NOT set Content-Length here. StreamingResponse with an explicit
# Content-Length causes h11 LocalProtocolError ("Too little data for
# declared Content-Length") for large files. Omitting it lets h11 use

range_header = request.headers.get("range")
if range_header is not None:
parsed = _parse_range_header(range_header, total_size)
if parsed is None:
return Response(
status_code=416,
headers={**headers, "Content-Range": f"bytes */{total_size}"},
)
start, end = parsed
length = end - start + 1
chunks = read_virtual_file_chunked(filename, length, start=start)
partial_headers = {
**headers,
"Content-Range": f"bytes {start}-{end}/{total_size}",
"Content-Length": str(length),
}
return StreamingResponse(
content=chunks,
status_code=206,
media_type=mimetype,
headers=partial_headers,
)
Comment thread
mscolnick marked this conversation as resolved.

# Do NOT set Content-Length on full responses. StreamingResponse with an
# explicit Content-Length causes h11 LocalProtocolError ("Too little data
# for declared Content-Length") for large files. Omitting it lets h11 use
# chunked transfer encoding instead. See #8917.
chunks = read_virtual_file_chunked(filename, total_size)
return StreamingResponse(
content=chunks,
media_type=mimetype,
headers=headers,
)


_RANGE_RE = re.compile(r"^bytes=(\d*)-(\d*)$", re.IGNORECASE)


def _parse_range_header(
range_header: str, total_size: int
) -> tuple[int, int] | None:
"""Parse a single-range HTTP ``Range`` header.

Returns ``(start, end)`` byte offsets (inclusive) on success, or
``None`` if the range is unsatisfiable. Multi-range requests are
treated as unsatisfiable since marimo only supports single ranges.
"""
match = _RANGE_RE.match(range_header.strip())
if match is None or total_size == 0:
return None
start_str, end_str = match.group(1), match.group(2)
if start_str == "" and end_str == "":
return None
if start_str == "":
# Suffix range: last N bytes.
suffix = int(end_str)
if suffix == 0:
return None
start = max(total_size - suffix, 0)
end = total_size - 1
else:
start = int(start_str)
end = int(end_str) if end_str else total_size - 1
if start >= total_size or end < start:
return None
end = min(end, total_size - 1)
return start, end


@router.get("/public-files-sw.js")
async def public_files_service_worker(request: Request) -> Response:
"""
Expand Down
19 changes: 18 additions & 1 deletion marimo/_smoke_tests/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import marimo

__generated_with = "0.17.6"
__generated_with = "0.23.5"
app = marimo.App()


Expand All @@ -12,6 +12,7 @@ def _():
import requests
from io import BytesIO
import base64

return BytesIO, base64, mo, requests


Expand Down Expand Up @@ -91,6 +92,22 @@ def _(mo):
return


@app.cell
def _(mo):
# Regression test for #9460: mo.audio with a numpy array goes through the
# virtual file endpoint, which must serve HTTP Range requests so Safari's
# <audio> element will play it. Open this notebook in Safari and confirm
# the player is enabled and audible.
import math

import numpy as np

_sr = 44100
_samples = 0.01 * np.sin(math.tau * np.cumsum(np.linspace(660, 110, 100000)) / _sr)
mo.audio(_samples, _sr, normalize=False)
return


@app.cell
def _(mo):
mo.video(
Expand Down
17 changes: 17 additions & 0 deletions tests/_runtime/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ def test_read_chunked_chunk_sizes(self) -> None:
assert len(chunks[-1]) <= chunk_size
assert b"".join(chunks) == data

def test_read_chunked_with_start_offset(self) -> None:
storage = InMemoryStorage()
storage.store("test_key", b"hello world")
chunks = list(storage.read_chunked("test_key", 5, start=6))
assert b"".join(chunks) == b"world"


class TestInMemoryStorage:
def test_store_and_read(self) -> None:
Expand Down Expand Up @@ -268,6 +274,17 @@ def test_read_chunked_cross_process(self) -> None:
finally:
storage1.shutdown()

def test_read_chunked_with_start_offset(self) -> None:
storage = SharedMemoryStorage()
try:
storage.store("marimo_chunk_offset", b"hello world")
chunks = list(
storage.read_chunked("marimo_chunk_offset", 5, start=6)
)
assert b"".join(chunks) == b"world"
finally:
storage.shutdown()

def test_read_chunked_data_integrity(self) -> None:
"""Test that chunked read produces identical data to regular read."""
storage = SharedMemoryStorage()
Expand Down
92 changes: 92 additions & 0 deletions tests/_server/api/endpoints/test_assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,98 @@ def test_vfile_large_streaming(client: TestClient) -> None:
manager.storage = original_storage


def test_vfile_range_requests(client: TestClient) -> None:
"""Virtual files must support HTTP Range requests so that Safari can
play media (audio/video) — Safari's <audio> element refuses to load
sources whose server doesn't return 206 Partial Content for range
probes.

See https://github.com/marimo-team/marimo/issues/9460
"""
from marimo._runtime.virtual_file.storage import (
InMemoryStorage,
VirtualFileStorageManager,
)

manager = VirtualFileStorageManager()
original_storage = manager.storage
storage = InMemoryStorage()
manager.storage = storage

try:
data = bytes(range(256)) * 8 # 2048 bytes of deterministic content
filename = "test-audio.wav"
storage.store(filename, data)
byte_length = len(data)
url = f"/@file/{byte_length}-{filename}"

# Plain GET advertises Accept-Ranges so clients know they can probe.
response = client.get(url, headers=token_header())
assert response.status_code == 200, response.text
assert response.headers.get("accept-ranges") == "bytes"
assert response.content == data

# Bounded range returns 206 with Content-Range and exact bytes.
response = client.get(
url,
headers={**token_header(), "Range": "bytes=0-99"},
)
assert response.status_code == 206, response.text
assert (
response.headers.get("content-range")
== f"bytes 0-99/{byte_length}"
)
assert response.headers.get("content-length") == "100"
assert response.headers.get("accept-ranges") == "bytes"
assert response.content == data[0:100]

# Open-ended range (start-) serves to the end of the file.
response = client.get(
url,
headers={**token_header(), "Range": "bytes=50-"},
)
assert response.status_code == 206, response.text
end = byte_length - 1
assert (
response.headers.get("content-range")
== f"bytes 50-{end}/{byte_length}"
)
assert response.content == data[50:]

# Suffix range (-N) returns the last N bytes.
response = client.get(
url,
headers={**token_header(), "Range": "bytes=-50"},
)
assert response.status_code == 206, response.text
start = byte_length - 50
assert (
response.headers.get("content-range")
== f"bytes {start}-{end}/{byte_length}"
)
assert response.content == data[-50:]

# Out-of-range start → 416 with Content-Range advertising the size.
response = client.get(
url,
headers={**token_header(), "Range": f"bytes={byte_length}-"},
)
assert response.status_code == 416, response.text
assert (
response.headers.get("content-range") == f"bytes */{byte_length}"
)

# Range unit token is case-insensitive per RFC 9110.
response = client.get(
url,
headers={**token_header(), "Range": "Bytes=0-99"},
)
assert response.status_code == 206, response.text
assert response.content == data[:100]
finally:
manager.storage = original_storage


def test_vfile_download_query_param_sets_content_disposition(
client: TestClient,
) -> None:
Expand Down
Loading