diff --git a/gbasis/parsers.py b/gbasis/parsers.py index 686412f6..aec75e43 100644 --- a/gbasis/parsers.py +++ b/gbasis/parsers.py @@ -166,6 +166,125 @@ def parse_gbs(gbs_basis_file): return output +def parse_bse(basis_set, atoms=None): + """Parse a basis set from the Basis Set Exchange (BSE). + + This function lazily imports the ``basis_set_exchange`` package and converts the + BSE representation into the same dictionary format returned by the other + parsers in this module (mapping element symbol to list of (angmom, exps, coeffs)). + + Parameters + ---------- + basis_set : str + Name of the basis set to fetch from BSE (e.g., "sto-3g", "6-31g"). + atoms : list, optional + If provided, only elements in this list will be fetched. The list may contain + element atomic numbers (ints) or symbols (strs); it is passed directly to + ``basis_set_exchange.get_basis(..., elements=atoms)``. + + Returns + ------- + basis_dict : dict + Dictionary mapping element symbol to list of tuples (angmom, exps, coeffs). + + Raises + ------ + ImportError + If the ``basis_set_exchange`` package is not available. + ValueError + If an unexpected or missing layout is encountered in the BSE data. + + """ + # lazy import so that BSE is an optional dependency + try: + from basis_set_exchange import lut, get_basis + except Exception as exc: # pragma: no cover - import depends on user env + raise ImportError( + "The 'basis_set_exchange' package is required for parse_bse." + " Install it with 'pip install basis-set-exchange'." + ) from exc + + bse_res = get_basis(basis_set, elements=atoms) + if not isinstance(bse_res, dict): + raise ValueError("Unexpected response from basis_set_exchange.get_basis; expected dict.") + + elements = bse_res.get("elements", bse_res) + if not elements: + raise ValueError(f"No basis data found for '{basis_set}'.") + + output = {} + for atom_num_str, info in elements.items(): + atom_symbol = lut.element_sym_from_Z(int(atom_num_str), normalize=True) + + shells = info.get("electron_shells") + if not shells: + raise ValueError(f"No electron shells for element {atom_symbol} in '{basis_set}'.") + + for shell in shells: + exps_raw = shell.get("exponents") + if not exps_raw: + raise ValueError(f"Empty exponents for element {atom_symbol} in '{basis_set}'.") + exponents = np.asarray(exps_raw, dtype=float) + + # BSE stores angular_momentum as list of ints + ang_moms = shell.get("angular_momentum") + if not ang_moms: + # missing angular momentum or empty list is unexpected; raise concise layout error + raise ValueError(f"Unexpected coefficients layout for element {atom_symbol}") + + for i, l in enumerate(ang_moms): + coeffs_raw = shell.get("coefficients") + if not coeffs_raw or len(coeffs_raw) <= i: + raise ValueError( + f"Unexpected coefficients layout for element {atom_symbol}, l={l}." + ) + + coeffs_entry = coeffs_raw[i] + coeffs = np.asarray(coeffs_entry, dtype=float) + + # Normalize to 2D array with shape (n_exponents, n_contractions). + # Accept scalars, 1D and 2D arrays and normalize them into + # (n_exponents, n_contractions) layout. + if coeffs.ndim == 0: + # scalar -> single exponent, single contraction + coeffs = coeffs.reshape(1, 1) + elif coeffs.ndim == 1: + # 1D array must match number of exponents and is treated as + # a single contraction (n_exponents,) -> (n_exponents, 1) + if coeffs.shape[0] == exponents.shape[0]: + coeffs = coeffs.reshape(-1, 1) + else: + raise ValueError( + f"Coefficient/exponent mismatch for {atom_symbol} (l={l}): " + f"{coeffs.shape[0]} coeffs vs {exponents.shape[0]} exponents" + ) + elif coeffs.ndim == 2: + # Accept either (n_exponents, n_contractions) or the transposed + # (n_contractions, n_exponents). + if coeffs.shape[0] == exponents.shape[0]: + pass + elif coeffs.shape[1] == exponents.shape[0]: + coeffs = coeffs.T + else: + raise ValueError( + f"Coefficient/exponent mismatch for {atom_symbol} (l={l}): " + f"{coeffs.shape[0]}x{coeffs.shape[1]} vs {exponents.shape[0]} exponents" + ) + else: + raise ValueError( + f"Unsupported coefficients ndim={coeffs.ndim} for {atom_symbol} (l={l})" + ) + + if coeffs.shape[0] != exponents.shape[0]: + raise ValueError( + f"Coefficient/exponent mismatch for {atom_symbol} (l={l})" + ) + + output.setdefault(atom_symbol, []).append((l, exponents, coeffs)) + + return output + + def make_contractions(basis_dict, atoms, coords, coord_types): """Return the contractions that correspond to the given atoms for the given basis. diff --git a/notebooks/tutorial/bse_example.ipynb b/notebooks/tutorial/bse_example.ipynb new file mode 100644 index 00000000..da916195 --- /dev/null +++ b/notebooks/tutorial/bse_example.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5ca98ec4", + "metadata": {}, + "source": [ + "# Basis-Set-Exchange (BSE) support in gbasis\n", + "This notebook demonstrates the `parse_bse()` helper to fetch basis sets directly from the Basis Set Exchange (BSE) and convert them into the `gbasis` internal format." + ] + }, + { + "cell_type": "markdown", + "id": "2691181e", + "metadata": {}, + "source": [ + "# 1) Install optional dependency\n", + "\n", + "To use `parse_bse()` you can install the optional dependency:\n", + "\n", + "```\n", + "# install for runtime\n", + "pip install basis-set-exchange\n", + "\n", + "# or as a dev dependency\n", + "pip install -e .[dev]\n", + "```\n", + "\n", + "The implementation uses a lazy import and will raise an informative ImportError if the package is not available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e693a42", + "metadata": {}, + "outputs": [], + "source": [ + "# 2) Import libraries and try BSE import\n", + "\n", + "import numpy as np\n", + "from gbasis.parsers import parse_bse\n", + "\n", + "# guarded import to show lazy behaviour in examples\n", + "try:\n", + " from basis_set_exchange import get_basis, lut\n", + " print(\"basis_set_exchange available\")\n", + "except Exception:\n", + " print(\"basis_set_exchange not installed; parse_bse will raise if invoked\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "9b1a50d9", + "metadata": {}, + "source": [ + "# 3. Concept: BSE -> gbasis mapping\n", + "\n", + "# BSE JSON uses atomic numbers as keys; convert to symbols with lut.element_sym_from_Z\n", + "# Each element has 'electron_shells' with keys 'angular_momentum', 'exponents', 'coefficients'\n", + "# We'll assemble: element_symbol -> [(l, exponents(np.ndarray), coeffs(np.ndarray)), ...]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "429fd0c5", + "metadata": {}, + "outputs": [], + "source": [ + "# 4) fetch sto-3g for H and inspect\n", + "\n", + "try:\n", + " b = parse_bse(\"sto-3g\", atoms=[1])\n", + " print(\"Elements:\", list(b.keys()))\n", + " print(\"First H shell:\")\n", + " print(\" angmom:\", b[\"H\"][0][0])\n", + " print(\" exponents:\", b[\"H\"][0][1])\n", + " print(\" coeffs shape:\", b[\"H\"][0][2].shape)\n", + "except Exception as exc:\n", + " print(\"Could not fetch: \", exc)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54670763", + "metadata": {}, + "outputs": [], + "source": [ + "# 5) Integrate with gbasis make_contractions (example for H2)\n", + "from gbasis.parsers import parse_bse\n", + "from gbasis.parsers import make_contractions\n", + "import numpy as np\n", + "\n", + "try:\n", + " basis = parse_bse(\"sto-3g\", atoms=[1])\n", + " atoms = [\"H\", \"H\"]\n", + " coords = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, 0.74]])\n", + " # choose spherical coordinate type\n", + " contractions = make_contractions(basis, atoms, coords, \"spherical\")\n", + " print(f\"Created {len(contractions)} contraction shells for H2\")\n", + "except Exception as exc:\n", + " print(\"Example could not run:\", exc)\n" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 82727343..683c0eca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dev = [ "sphinx", "sphinx_autodoc_typehints", "sphinx-copybutton", + "basis_set_exchange" ] doc = [ "numpydoc", diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 5a42726f..44a34b51 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,5 +1,5 @@ """Test gbasis.parsers.""" -from gbasis.parsers import make_contractions, parse_gbs, parse_nwchem +from gbasis.parsers import make_contractions, parse_gbs, parse_nwchem, parse_bse import numpy as np import pytest from utils import find_datafile @@ -758,6 +758,60 @@ def test_parse_gbs_anorcc(): assert np.allclose(test["H"][3][2], np.array([[1.0000000]])) +def test_parse_bse_sto3g(): + """Test gbasis.parsers.parse_bse for sto-3g (skipped if BSE not installed).""" + pytest.importorskip("basis_set_exchange") + test = parse_bse("sto-3g", atoms=[1]) + assert "H" in test + # ensure there is at least one s-shell and that arrays have expected dtypes/shapes + assert any(shell[0] == 0 for shell in test["H"]) + assert isinstance(test["H"][0][1], np.ndarray) + assert isinstance(test["H"][0][2], np.ndarray) + assert test["H"][0][2].ndim == 2 + + +def test_parse_bse_empty_elements(monkeypatch): + """parse_bse should raise on missing/empty elements returned by BSE.""" + import sys + import types + + fake = types.SimpleNamespace() + + def fake_get_basis(basis_set, elements=None): + return {} + + fake.get_basis = fake_get_basis + fake.lut = types.SimpleNamespace(element_sym_from_Z=lambda z, normalize=True: "X") + + monkeypatch.setitem(sys.modules, "basis_set_exchange", fake) + + with pytest.raises(ValueError, match="No basis data found"): + parse_bse("no-such-basis") + + +def test_parse_bse_unexpected_coeff_layout(monkeypatch): + """parse_bse should raise a concise error for unexpected coefficient layout.""" + import sys + import types + + fake = types.SimpleNamespace() + + def fake_get_basis(basis_set, elements=None): + return { + "elements": { + "1": {"electron_shells": [{"exponents": [1.0], "coefficients": []}]} + } + } + + fake.get_basis = fake_get_basis + fake.lut = types.SimpleNamespace(element_sym_from_Z=lambda z, normalize=True: "H") + + monkeypatch.setitem(sys.modules, "basis_set_exchange", fake) + + with pytest.raises(ValueError, match="Unexpected coefficients layout"): + parse_bse("bad-coeffs", atoms=[1]) + + def test_make_contractions(): """Test gbasis.contractions.make_contractions.""" basis_dict = parse_nwchem(find_datafile("data_sto6g.nwchem"))