Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
eb5a1b2
Adding read_product function to mast.observations to stream asdf and …
Apr 28, 2026
b04daad
Removing unecessary logic for checking packages
May 1, 2026
e4a1991
Adding mock tests for read_product
May 1, 2026
722dde6
adding asdf to optional project.optional-dependencies
May 1, 2026
a19308a
Cleaning various codestlye issues
May 1, 2026
ca6e1c2
Adding in tests remote data testing for read_product function
May 1, 2026
7229c04
Adding in lz4, gwcs, roman-datamodel to optional-depends as they are …
May 1, 2026
5f8e973
Adding in section to Observations docs for review, clean up of functi…
May 4, 2026
6990708
Updating docs for read_product
May 6, 2026
6f138b3
Merge branch 'main' into ASB-30568_read-product-function
May 8, 2026
521d468
tox.ini pinning roman-datamodels and gwcs to prevent numpy versions a…
May 8, 2026
c3681a5
Updating read_product to parse compressed fits files
May 8, 2026
afb86e2
Cleaning mast_obsquery blank line causing docs build fail
May 8, 2026
3508915
updating tests to handle cases were fits/asdf cannot be open, and whe…
May 8, 2026
b818613
Merge branch 'main' into ASB-30568_read-product-function
May 14, 2026
e740db5
Update docs/mast/mast_obsquery.rst
AlexReedy May 14, 2026
0a3354d
Updating doc strings and fixing typos
May 14, 2026
79ca3ba
Refactored and updated to have stream asdf using fsspec instead of s3fs
May 15, 2026
ac66e84
Removing s3fs as fsspec also installs s3fs
May 15, 2026
6841eec
Merge branch 'astropy:main' into ASB-30568_read-product-function
AlexReedy May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ mast
and column descriptions in the column metadata. [#3588]
- Added ``pass_id`` as an alias for the ``pass`` column in query functions for the Roman mission to avoid conflicts with
the reserved Python keyword. [#3588]
- Adding in ability to read FITS and ASDF data products to memory from STScI's S3 open data bucket using ``Observations.read_product()`` function. [#3561]


jplspec
Expand Down
78 changes: 78 additions & 0 deletions astroquery/mast/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
import time
import os
from urllib.parse import quote
import importlib.util

import numpy as np
import astropy.units as u
from astropy.io import fits
import astropy.coordinates as coord
from requests import HTTPError
from astropy.table import Table, Row, vstack
Expand All @@ -36,6 +38,13 @@
except ImportError:
pass

try:
import asdf
import fsspec
except ImportError:
asdf = None
fsspec = None

__all__ = ['Observations', 'ObservationsClass', 'MastClass', 'Mast']

CLOUD_DISABLED_MESSAGE = (
Expand Down Expand Up @@ -1203,6 +1212,75 @@ def get_unique_product_list(self, observations, *, batch_size=500):
log.info("To return all products, use `Observations.get_product_list`")
return unique_products

# TODO: Need to inlcude way to parse if it is a MAST on prem URL and handle the streaming of that
def read_product(self, product_path, ignore_unrecognized=True, **kwargs):
"""
Read a product from Open S3 bucket to memory. Currently supports FITS and ASDF product types only.

Parameters
----------
product_path: str
URI to the product in the STScI S3 open data bucket.
ignore_unrecognized: bool
Tells asdf.open() to include or ignore warnings from unrecognized asdf tags. Defaults to True
**kwargs
Additional keyword arguments passed to the underlying file reader:
- For FITS files: forwarded to ``astropy.io.fits.open``.
Common options include ``memmap``, ``mode``, etc.
- Ignored for ASDF files (except for future extension if needed).

Returns
-------
object
FITS or ASDF object for the given data product.
"""
# Checks if a path is empty or None.
if not product_path or not str(product_path).strip():
raise ValueError("No product path provided")

# Forces the path to be lowercase for the extension checks. This is only used for the checks
path = str(product_path).lower()

# Checks users enviornment for fsspec, required for both fits and asdf
if fsspec is None:
raise ImportError('The "fsspec" package is required to read products directly from a URI. '
'Please install it with `pip install fsspec`.')

# Logic for reading FITS files
if path.endswith((".fits", ".fits.gz")):
try:
data_product = fits.open(product_path, fsspec_kwargs={"anon": True}, **kwargs)
log.info(f"Loaded: {product_path}")
return data_product
except Exception as e:
raise RuntimeError(f"Failed to open FITS File: {product_path} {e}")

# Logic for reading ASDF files
elif path.endswith(".asdf"):
# checks for asdf package and will raise and error if not installed as asdf is required
for pkg in ["asdf"]:
if importlib.util.find_spec(pkg) is None:
raise ImportError(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. '
f'Please install it with `pip install {pkg}`.')

# Checks for gwcs and warns the user if it is not installed, this will not stop the function.
for pkg in ["gwcs"]:
if importlib.util.find_spec(pkg) is None:
warnings.warn(f'The "{pkg}" package is required to read ASDF files containing {pkg} data. '
f'Please install it with `pip install {pkg}`.')

# Attempts to open the asdf files
try:
f = fsspec.open(product_path, "rb", anon=True).open()
data_product = asdf.open(f, ignore_unrecognized_tag=ignore_unrecognized)
log.info(f"Loaded: {product_path}")
return data_product
except Exception as e:
raise RuntimeError(f"Failed to open ASDF File: {product_path} {e}")

else:
raise ValueError(f"Unsupported product type: {product_path}")


@async_to_sync
class MastClass(MastQueryWithLogin):
Expand Down
59 changes: 59 additions & 0 deletions astroquery/mast/tests/test_mast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1372,6 +1372,65 @@ def test_observations_disable_cloud_dataset(patch_boto3):
assert Observations._cloud_enabled_explicitly is False


@pytest.fixture
def mock_fits_open(mocker):
"""Mock fits.open to return a valid HDUList without network access."""
return mocker.patch("astropy.io.fits.open", return_value=fits.HDUList([fits.PrimaryHDU()]))


@pytest.fixture
def mock_asdf_open(mocker):
return mocker.patch(
"asdf.open",
return_value=MagicMock(name="AsdfFile"),
)


@pytest.fixture
def mock_fsspec_open(mocker):
fake = mocker.Mock()
fake.open.return_value = "mock_asdf_file_object"
return mocker.patch("fsspec.open", return_value=fake)


def test_observations_read_product_fits(mock_fits_open):
s3_fits_path = "s3://mock_fits_path.fits"
result = Observations.read_product(s3_fits_path)

mock_fits_open(s3_fits_path, fsspec_kwargs={"anon": True})
assert result is mock_fits_open.return_value


def test_observations_read_product_asdf(mock_asdf_open, mock_fsspec_open):
s3_asdf_path = "s3://fake_asdf_path.asdf"
result = Observations.read_product(s3_asdf_path)

mock_asdf_open("mock_asdf_file_object")
assert result is mock_asdf_open.return_value


@pytest.mark.parametrize(
"product_path, expected_exception, match",
[
("", ValueError, "No product path provided"),
(" ", ValueError, "No product path provided"),
(None, ValueError, "No product path provided"),
("unsupported_ex.txt", ValueError, "Unsupported product type"),
],
)
def test_observations_read_product_invalid_inputs(product_path, expected_exception, match):
with pytest.raises(expected_exception, match=match):
Observations.read_product(product_path)


def test_observations_read_product_fsspec_missing(monkeypatch):
# Forces fsspec to be None
monkeypatch.setitem(Observations.read_product.__globals__, "fsspec", None)

with pytest.raises(ImportError, match="fsspec"):
Observations.read_product("file.fits")


######################
# CatalogClass tests #
######################
Expand Down
24 changes: 24 additions & 0 deletions astroquery/mast/tests/test_mast_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
from ...exceptions import (InputWarning, InvalidQueryError, MaxResultsWarning,
NoResultsWarning)

try:
import asdf
except ImportError:
asdf = None


@pytest.fixture(scope="module")
def msa_product_table():
Expand Down Expand Up @@ -1048,6 +1053,25 @@ def test_observations_get_cloud_uris_no_duplicates(self, msa_product_table, rese
uris = Observations.get_cloud_uris(products)
assert len(uris) == 1

@pytest.mark.remote_data
@pytest.mark.parametrize(
"product_path, expected_type",
[
(
"s3://stpubdata/hst/public/u24r/u24r0102t/u24r0102t_c1f.fits",
fits.HDUList,
),
(
"s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/"
"r0003201001001001004_0001_wfi01_f106_cal.asdf",
asdf.AsdfFile,
),
],
)
def test_observations_read_product(self, product_path, expected_type):
product = Observations.read_product(product_path)
assert isinstance(product, expected_type)

######################
# CatalogClass tests #
######################
Expand Down
13 changes: 13 additions & 0 deletions docs/mast/mast_obsquery.rst
Original file line number Diff line number Diff line change
Expand Up @@ -629,3 +629,16 @@ remain fully cloud-based.
COMPLETE
COMPLETE
COMPLETE

Streaming Data Products from S3 to memory
-----------------------------------------

If instead of downloading you would like to load an S3 URI directly to memory, you can use the `~astroquery.mast.ObservationsClass.read_product` method.
This function supports FITS and ASDF data products and will automatically parse the file for the suffix and load it to memory using `~astropy.io.fits.open` or `~asdf.open`.
For ASDF data products, additional packages may be required (e.g `~lz4` and `~roman-datamodels` for data from the Roman Space Telescope or `~gwcs` for common ASDF schema tags).

.. doctest-remote-data::

>>> from astroquery.mast import Observations
>>> fits_product = Observations.read_product(product_path="s3://stpubdata/hst/public/u9o4/u9o40504m/u9o40504m_c3m.fits")
>>> asdf_product = Observations.read_product(product_path="s3://stpubdata/roman/nexus/soc_simulations/tutorial_data/r0003201001001001004_0001_wfi01_f106_cal.asdf", ignore_unrecognized=True)
9 changes: 6 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,20 @@ Documentation = "https://astroquery.readthedocs.io"

[project.optional-dependencies]
test = [
"asdf",
"gwcs",
"roman-datamodels",
"pytest>=7.4",
"pytest-doctestplus>=1.4",
"pytest-timeout",
"pytest-astropy",
"lz4",
"matplotlib",
# Temp workaround for https://github.com/RKrahl/pytest-dependency/issues/91
"pytest-dependency; platform_system != 'Windows'",
"pytest-rerunfailures",
"fsspec[http]",
"moto[s3]",
"s3fs",
"fsspec[http,s3]",
"moto[s3]"
]
docs = [
"sphinx",
Expand Down
2 changes: 2 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ deps =
oldestdeps-alldeps: mocpy==0.12
oldestdeps-alldeps: regions==0.5
oldestdeps-alldeps: astropy-healpix==0.7
oldestdeps-alldeps: roman_datamodels==0.11
oldestdeps-alldeps: gwcs==0.18

online: pytest-custom_exit_code

Expand Down
Loading