diff --git a/ICESat2_Kd_Flowchart.png b/ICESat2_Kd_Flowchart.png new file mode 100644 index 0000000..52ad60f Binary files /dev/null and b/ICESat2_Kd_Flowchart.png differ diff --git a/README.md b/README.md index 812d929..1ef0cc0 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,36 @@ ATLAS Ocean Kd (AOK) - calculating ocean Kd values using ICESat-2 > [!WARNING] > This package is currently under active development to produce the most reliable calculations of Kd values. > The code is therefore unstable, may have significant uncertainties, and should be used with caution. +> This repository contains Python code developed in collaboration to replicate the functionality of the original MATLAB scripts (https://github.com/emilyeidam/icesat-2_kdph +) by Dr. Emily Eidam (emily.eidam@oregonstate.edu). The code facilitates processing ICESat-2 data to calculate the diffuse attenuation coefficient (Kd) based on space-based lidar photon profiles, following methods described in the original MATLAB code. Please cite appropriately both Python and the MATLAB version under the GNU GPLv3 license (can you check which one is better @Jessica) if you use this code in your research. [![All Contributors](https://img.shields.io/github/all-contributors/icesat2py/aok?color=ee8449&style=flat-square)](#contributors) # Quickstart -## Calculating Kd -Coming soon... +# Installation -## Installation -Coming soon... +1. Clone the repository: + + ```bash + git clone https://github.com/ChaoEcohydroRS/icesat-2_kdph_py.git + cd IS2_Kd_Proj + + +2. Install the required packages: + ``` + pip install -r requirements.txt + ``` + +3. Ensure .h5 files are placed in the appropriate default folder structure. + +# Usage +1. Edit the config.py file to set up file paths and any parameters. +2. Run the entire workflow through main.py: + + ```bash + python main.py + ``` # Contributing @@ -39,3 +59,12 @@ Our full attribution guidelines are coming soon. We use the [All Contributors Specification](https://allcontributors.org/docs/en/specification) to recognize our incredible [contributor team](CONTRIBUTORS.md). +# Citation +If you use this code, please cite: + +Eidam, E.F., K. Bisson, C. Wang, C. Walker, and A. Gibbons (2024). ICESat-2 and ocean particulates: A roadmap for calculating Kd from space-based lidar photon profiles. Remote Sensing of Environment. Vol. 311. https://doi.org/10.1016/j.rse.2024.114222 + +# License +This project is licensed under the GNU GPLv3 license. + + diff --git a/icesat-2_kdph_py/README.md b/icesat-2_kdph_py/README.md new file mode 100644 index 0000000..28f6ee4 --- /dev/null +++ b/icesat-2_kdph_py/README.md @@ -0,0 +1,198 @@ +# icesat-2_kdph_py + + +This repository contains Python code developed in collaboration to replicate the functionality of the original MATLAB scripts (https://github.com/emilyeidam/icesat-2_kdph +) by Dr. Emily Eidam (emily.eidam@oregonstate.edu). The code facilitates processing ICESat-2 data to calculate the diffuse attenuation coefficient (Kd) based on space-based lidar photon profiles, following methods described in the original MATLAB code. Please cite appropriately both Python and the MATLAB version under the GNU GPLv3 license if you use this code in your research. + +## Repository Structure + +Current structure: + +```text +icesat-2_kdph_py/ ++-- main.py ++-- config.py ++-- requirements.txt ++-- README.md ++-- kd_utils/ + +-- __init__.py + +-- data_processing.py + +-- sea_photons_analysis.py + +-- bathy_processing.py + +-- Kd_analysis.py + +-- interpolation.py + +-- visualization.py + +-- sliderule_adapter.py + +-- SeaSurfaceFlattening.py + +-- SolarBckgrd/ +``` + +Key modules: + +- `main.py`: End-to-end pipeline entrypoint for local ATL03 workflows. +- `config.py`: All CLI switches, paths, and thresholds. +- `kd_utils/data_processing.py`: ATL03 ingestion, land/sea masking, optional pre-filters. +- `kd_utils/sea_photons_analysis.py`: Binning and sea-surface detection. +- `kd_utils/bathy_processing.py`: Subsurface filtering and optional gap steps (GEBCO/ATL24/refraction/etc.). +- `kd_utils/Kd_analysis.py`: Kd fitting. +- `kd_utils/sliderule_adapter.py`: SlideRule ATL03 fetch + conversion to framework schema for notebook/script imports. + +### Getting Started + +### Prerequisites + +- Python 3.x +- Libraries: `numpy`, `pandas`, `scipy`, `h5py`, `matplotlib` +- Ensure you have ICESat-2 ATL03 `.h5` files available in the appropriate directory, as specified in `config.py`, and also refers to the original MATLAB code. + +### Installation + +1. Clone the repository: + + ```bash + git clone https://github.com/ChaoEcohydroRS/icesat-2_kdph_py.git + cd IS2_Kd_Proj + ``` + + +2. Install the required packages: + ``` + pip install -r requirements.txt + ``` + +3. Ensure .h5 files are placed in the appropriate default folder structure. + +### Usage +1. Edit the config.py file to set up file paths and any parameters. +2. Run the entire workflow through main.py: + + ```bash + python main.py + ``` + +### SlideRule Ingestion (Notebook Import Path) + +You can reuse the same framework without local ATL03 `.h5` files by importing the SlideRule adapter: + +```python +from kd_utils.sliderule_adapter import fetch_and_prepare_sliderule_dataset +from kd_utils.sea_photons_analysis import process_sea_photon_binning +from kd_utils.bathy_processing import process_subsurface_photon_filtering +from kd_utils.Kd_analysis import process_kd_calculation + +region = [ + {"lon": -84.029, "lat": 29.732}, + {"lon": -84.029, "lat": 29.954}, + {"lon": -83.969, "lat": 29.954}, + {"lon": -83.969, "lat": 29.732}, + {"lon": -84.029, "lat": 29.732}, +] + +sea_photon_dataset = fetch_and_prepare_sliderule_dataset( + region=region, + t0="2025-05-22T00:00:00Z", + t1="2025-05-24T00:00:00Z", + rgt=1033, + strong_beams_only=True, +) + +binned = process_sea_photon_binning(sea_photon_dataset, horizontal_res=500, vertical_res=0.25) +sea_h, sea_lbl, subsurface = process_subsurface_photon_filtering( + binned, GEBCO_paths=[], subsurface_thresh=1.0, Ignore_Subsurface_Height_Thres=-6 +) +kd_df = process_kd_calculation(subsurface) +``` + +This path reuses your existing processing framework; only ingestion switches from local HDF5 to SlideRule API. + +### Optional Gap Steps (Sensitivity On/Off) + +All extra processing steps are optional and disabled by default. + +- IR/AP proxy photon filtering: + ```bash + python main.py --enable_ir_ap_filter + ``` +- Solar background filtering: + ```bash + python main.py --enable_solar_background_filter + ``` +- GEBCO seafloor filtering: + ```bash + python main.py --enable_gebco_filter + ``` +- ATL24-first with GEBCO fallback: + ```bash + python main.py --enable_atl24_filter --atl24_file path/to/atl24_points.csv --enable_gebco_filter + ``` +- Sea surface flattening: + ```bash + python main.py --enable_sea_surface_flattening + ``` +- Convex hull reasonableness filter: + ```bash + python main.py --enable_convex_hull_filter + ``` +- Paired-beam combine before Kd fit: + ```bash + python main.py --enable_paired_beam_combine + ``` +- Histogram quality filter (<5% signal at 6-7 m depth band): + ```bash + python main.py --enable_histogram_quality_filter + ``` +- Surface Gaussian sigma filter: + ```bash + python main.py --enable_surface_sigma_filter + ``` +- Refraction correction: + ```bash + python main.py --enable_refraction_correction + ``` +- Post-refraction rebuild + re-fit: + ```bash + python main.py --enable_refraction_correction --enable_post_refraction_refit + ``` + +### Gap Implementation Progress + +The flowchart gaps are being implemented one-by-one with optional switches. + +- [x] Gap 1: Remove photons flagged as IR/AP (proxy implementation via `quality_ph` and `photon_conf`) + - Switch: `--enable_ir_ap_filter` + - Tuning: `--ir_ap_quality_max`, `--ir_ap_min_signal_conf` +- [x] Gap 2: Histogram quality review (<5% signal at 6-7 m) + - Switch: `--enable_histogram_quality_filter` + - Tuning: + - `--histogram_quality_min_ratio` (default `0.05`) + - `--histogram_quality_depth_min` (default `6.0`) + - `--histogram_quality_depth_max` (default `7.0`) +- [x] Gap 3: Discard bins with high surface Gaussian std dev + - Switch: `--enable_surface_sigma_filter` + - Tuning: `--surface_sigma_max` (default `0.5`) +- [x] Gap 4: Refraction correction integration + - Switch: `--enable_refraction_correction` + - Tuning: + - `--refraction_water_temp_c` (default `20.0`) + - `--refraction_wavelength_nm` (default `532.0`) +- [x] Gap 5: Rebuild histogram and re-fit after refraction + - Switch: `--enable_post_refraction_refit` (requires `--enable_refraction_correction`) +- [x] Gap 6: ATL24 query + fallback orchestration + - Switch: `--enable_atl24_filter` + - Note: current implementation uses local ATL24 point files provided via `--atl24_file`. + - Inputs: + - `--atl24_file` (CSV/Parquet/GPKG/SHP with lon/lat + bathymetry field) + - `--atl24_max_match_distance_deg` (default `0.01`) + - Fallback: if ATL24 has no usable matches and `--enable_gebco_filter` is on, GEBCO filtering is used. +- [x] Gap 7: Combine paired beams + - Switch: `--enable_paired_beam_combine` + - Method: maps left/right beams to `gt1_pair`, `gt2_pair`, `gt3_pair` and re-bins by `relative_AT_dist` using `horizontal_res`. + +### Citation +If you use this code, please cite: + +Eidam, E.F., K. Bisson, C. Wang, C. Walker, and A. Gibbons (2024). ICESat-2 and ocean particulates: A roadmap for calculating Kd from space-based lidar photon profiles. Remote Sensing of Environment. Vol. 311. https://doi.org/10.1016/j.rse.2024.114222 + +### License +This project is licensed under the GNU GPLv3 license. + diff --git a/icesat-2_kdph_py/SlideRule.py b/icesat-2_kdph_py/SlideRule.py new file mode 100644 index 0000000..367f27f --- /dev/null +++ b/icesat-2_kdph_py/SlideRule.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 14 12:48:13 2025 + +@author: wayne +""" + +from sliderule import sliderule, icesat2 +import geopandas as gpd + +sliderule.init(verbose=True) + +region = sliderule.toregion("bathy.geojson"); \ No newline at end of file diff --git a/icesat-2_kdph_py/__pycache__/config.cpython-39.pyc b/icesat-2_kdph_py/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..3252e2a Binary files /dev/null and b/icesat-2_kdph_py/__pycache__/config.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/config.py b/icesat-2_kdph_py/config.py new file mode 100644 index 0000000..a3ce3af --- /dev/null +++ b/icesat-2_kdph_py/config.py @@ -0,0 +1,247 @@ +# # config.py + +# path = 'C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/Dataset/ATL03_ICESat2/' + +# #US, Orgon +# # ATL03_h5_file = "processed_ATL03_20181206092124_10500106_005_01.h5"# +# #South America +# # ATL03_h5_file = "processed_ATL03_20220530041141_10391513_005_02.h5"# + + +# #Alaska Cook Inlet +# # ATL03_h5_file = "processed_ATL03_20210801125753_05941205_005_01.h5"# +# ATL03_20200805175053_06320803_006_01_subsetted +# ATL03_20210828231447_10131203_006_01_subsetted + +# # Hawaii line without afterpulses +# # ATL03_h5_file = "processed_ATL03_20220122044818_04721407_006_01.h5"# + + +# # ChesapeakeBay +# ATL03_h5_file = "processed_ATL03_20230825074121_10102002_006_02.h5"# + + +# # India +# # ATL03_h5_file = "processed_ATL03_20200331204156_00810707_006_01.h5"# + + + +# #China Bohai +# # ATL03_h5_file = "processed_ATL03_20191128115256_09560502_006_01.h5"# +# # ATL03_h5_file = "processed_ATL03_20190530203316_09560302_006_02.h5"# + +# ATL03_h5_file = "processed_ATL03_20190829161305_09560402_006_02.h5"# + + + +# ATL03_h5_file_path = path + ATL03_h5_file + +# Current_Path = 'C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/Dataset/' + +# shoreline_data_path = Current_Path + 'Shorelines/GeoPkgGlobalShoreline.gpkg' + +# #load the Global bathy dataset +# GEBCO_paths = [ +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n0.0_s-90.0_w0.0_e90.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n0.0_s-90.0_w-90.0_e0.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n0.0_s-90.0_w90.0_e180.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n0.0_s-90.0_w-180.0_e-90.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n90.0_s0.0_w0.0_e90.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n90.0_s0.0_w-90.0_e0.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n90.0_s0.0_w90.0_e180.0.tif", +# Current_Path + "Bathy/gebco_2024_geotiff/gebco_2024_n90.0_s0.0_w-180.0_e-90.0.tif", +# ] + +# horizontal_res = 500 + +# vertical_res = 0.25 + +# # Use calculated sea height to determine photons at 0.5m below peak +# subsurface_thresh = 0.5 + +# # subsurface distance below the sea surface beginning to account +# Kd_max_depth = -1 + +# OutputPath='C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/Results/' + +import argparse + +def get_args(): + parser = argparse.ArgumentParser(description="Configure ICESat-2 HLS analysis script.") + + # General paths + parser.add_argument("--workspace_path", type=str, default='C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/', + help="Root path for all data processing.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/New/', + # help="Base path for ICESat-2 ATL03 data.") + parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/', + help="Base path for ICESat-2 ATL03 data.") + + + + # #ChesapeakeBay + # parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20230825074121_10102002_006_02.h5", + # help="Name of the ATL03 H5 file to process.") + + # parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20221001113813_01641706_006_01.h5", + # help="Name of the ATL03 H5 file to process.") + + # # India: processed_ATL03_20200331204156_00810707_006_01.h5 + # parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20200331204156_00810707_006_01.h5", + # help="Name of the ATL03 H5 file to process.") + + # #ATL03_20210628230109_00811207_006_01_subsetted + + # parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20210628230109_00811207_006_01.h5", + # help="Name of the ATL03 H5 file to process.") + + + # # WaxDelta + # parser.add_argument("--atl03_file", type=str, default="ATL03_20231103172707_06982106_006_01_subsetted.h5", + # help="Name of the ATL03 H5 file to process.") + + + # # Bohai + parser.add_argument("--atl03_file", type=str, default="ATL03_20190829161305_09560402_006_02_subsetted.h5", + help="Name of the ATL03 H5 file to process.") + + # # Cook Inlet + # parser.add_argument("--atl03_file", type=str, default="ATL03_20220507112145_06931503_006_01_subsetted.h5", + # help="Name of the ATL03 H5 file to process.") + + + + # #Core Sound + # parser.add_argument("--atl03_file", type=str, default="105430185_ATL03_20220423191113_04841506_006_02_subsetted.h5", + # help="Name of the ATL03 H5 file to process.") + + # # Pamlico Sound + # parser.add_argument("--atl03_file", type=str, default="ATL03_20240415083629_04232306_006_01_subsetted.h5", + # help="Name of the ATL03 H5 file to process.") + + + # + # #Cook Inlet: processed_ATL03_20210801125753_05941205_005_01 + # parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20210801125753_05941205_005_01.h5", + # help="Name of the ATL03 H5 file to process.") + + parser.add_argument("--other_data_path", type=str, default='Dataset/', + help="Path to the current working directory.") + + parser.add_argument("--output_path", type=str, default='Results/', + help="Directory for saving results.") + + # Shoreline data + parser.add_argument("--shoreline_data", type=str, + # default='Shorelines/GeoPkgGlobalShoreline.gpkg', + default='Shorelines/ne_10m_land/ne_10m_land.shp', + help="Path to the global shoreline dataset.") + + # Bathymetry datasets + parser.add_argument("--gebco_path", nargs='+', type=str, + default='Bathy/gebco_2024_geotiff/', + help="Path to GEBCO bathymetry datasets.") + + # Resolution settings + parser.add_argument("--horizontal_res", type=int, default=500, + help="Horizontal resolution for the analysis (in meters).") + + parser.add_argument("--vertical_res", type=float, default=0.25, + help="Vertical resolution for the analysis.") + + # Analysis parameters + parser.add_argument("--subsurface_thresh", type=float, default=1.0, + help="Threshold for photons below the sea surface.") + + parser.add_argument("--ignore_subsurface_height_thres", type=float, default=-6, + help="Maximum depth thres for Kd calculations (in meters).") #5m + + parser.add_argument("--target_beams", type=str, default='', + help="Comma-separated beam IDs to process. Empty means auto-select strong beams.") + + parser.add_argument("--enable_solar_background_filter", action="store_true", + help="Optional: apply solar background filtering before binning.") + parser.add_argument("--solar_elevation_day_threshold", type=float, default=6.0, + help="Solar elevation threshold (deg) to define daytime for optional filtering.") + parser.add_argument("--solar_background_quantile", type=float, default=0.8, + help="Quantile threshold of daytime background rate considered high background.") + parser.add_argument("--solar_background_min_signal_conf", type=int, default=2, + help="Minimum photon signal confidence kept in high daytime background.") + + parser.add_argument("--enable_ir_ap_filter", action="store_true", + help="Optional: remove photons using IR/afterpulse proxy flags.") + parser.add_argument("--ir_ap_quality_max", type=int, default=0, + help="Maximum allowed quality_ph value when IR/AP filter is enabled.") + parser.add_argument("--ir_ap_min_signal_conf", type=int, default=0, + help="Minimum allowed photon_conf value when IR/AP filter is enabled.") + + parser.add_argument("--enable_gebco_filter", action="store_true", + help="Optional: remove photons below GEBCO seafloor and shallow bins.") + parser.add_argument("--enable_sea_surface_flattening", action="store_true", + help="Optional: flatten small-bin sea-surface variation using larger along-track windows.") + parser.add_argument("--sea_surface_flattening_window_m", type=int, default=500, + help="Window size in meters for sea-surface flattening mean level.") + + parser.add_argument("--enable_convex_hull_filter", action="store_true", + help="Optional: apply convex hull area filtering before Kd fitting.") + parser.add_argument("--convex_hull_area_threshold", type=float, default=100, + help="Minimum convex hull area to keep a horizontal bin when enabled.") + + parser.add_argument("--enable_histogram_quality_filter", action="store_true", + help="Optional: discard along-track bins with weak 6-7 m signal.") + parser.add_argument("--histogram_quality_min_ratio", type=float, default=0.05, + help="Minimum ratio of photons in depth band to keep a bin.") + parser.add_argument("--histogram_quality_depth_min", type=float, default=6.0, + help="Minimum depth (m below surface) of quality-check band.") + parser.add_argument("--histogram_quality_depth_max", type=float, default=7.0, + help="Maximum depth (m below surface) of quality-check band.") + + parser.add_argument("--enable_surface_sigma_filter", action="store_true", + help="Optional: discard bins where Gaussian surface sigma is too large.") + parser.add_argument("--surface_sigma_max", type=float, default=0.5, + help="Maximum allowed Gaussian sigma (m) for sea-surface peak.") + + parser.add_argument("--enable_refraction_correction", action="store_true", + help="Optional: apply refraction correction to subsurface photons.") + parser.add_argument("--refraction_water_temp_c", type=float, default=20.0, + help="Water temperature (deg C) used for refraction correction.") + parser.add_argument("--refraction_wavelength_nm", type=float, default=532.0, + help="Laser wavelength (nm) used for refraction correction.") + + parser.add_argument("--enable_post_refraction_refit", action="store_true", + help="Optional: rebuild histograms and re-fit surface after refraction correction.") + + parser.add_argument("--enable_atl24_filter", action="store_true", + help="Optional: use ATL24 bathymetry matching before GEBCO fallback.") + parser.add_argument("--atl24_file", type=str, default='', + help="Path to ATL24 point dataset (CSV/Parquet/GPKG/SHP) for bathymetry matching.") + parser.add_argument("--atl24_max_match_distance_deg", type=float, default=0.01, + help="Maximum lon/lat degree distance for ATL24 nearest-point matching.") + + parser.add_argument("--enable_paired_beam_combine", action="store_true", + help="Optional: combine paired beams (gt1, gt2, gt3) before Kd fitting.") + + # switch on consider the coastal water or inland water + # if Inland water, we should use one mask and for coastal water, it should be another mask + + # consider bathymetry or not manual setting + + + #night vs daytime + + #solar elevation detemine remove or not for solar background + + + # 1-2 hours + + + return parser.parse_args() + +if __name__ == "__main__": + args = get_args() + # Access parameters like this: + atl03_file_path = args.workspace_path + args.atl03_file + + print("Processing file:", atl03_file_path) + print("Output path:", args.output_path) diff --git a/icesat-2_kdph_py/data/grandmesa.geojson b/icesat-2_kdph_py/data/grandmesa.geojson new file mode 100644 index 0000000..3040374 --- /dev/null +++ b/icesat-2_kdph_py/data/grandmesa.geojson @@ -0,0 +1,8 @@ +{ +"type": "FeatureCollection", +"name": "grand_mesa_poly", +"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } }, +"features": [ +{ "type": "Feature", "properties": { }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -108.311682565537666, 39.137576462129438 ], [ -108.341156683252237, 39.037589876133246 ], [ -108.287868638779599, 38.89051431295789 ], [ -108.207729687800509, 38.823205529198098 ], [ -108.074601643110313, 38.847513782586297 ], [ -107.985605104949812, 38.943991201101703 ], [ -107.728398587557521, 39.015109302306328 ], [ -107.787241424909936, 39.195630349659986 ], [ -108.049394800987542, 39.139504663354245 ], [ -108.172870009708575, 39.159200663961158 ], [ -108.311682565537666, 39.137576462129438 ] ] ] } } +] +} diff --git a/icesat-2_kdph_py/icepyxATL03.py b/icesat-2_kdph_py/icepyxATL03.py new file mode 100644 index 0000000..45555f2 --- /dev/null +++ b/icesat-2_kdph_py/icepyxATL03.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +""" +Created on Sun Feb 16 21:32:51 2025 + +@author: wayne +""" +import icepyx as ipx + +import json +import math +import warnings + +import numpy as np +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap + +from shapely.geometry import shape, GeometryCollection + +# Open a geojson of our area of interest +with open("./data/grandmesa.geojson") as f: + features = json.load(f)["features"] + +grandmesa = GeometryCollection([shape(feature["geometry"]).buffer(0) for feature in features]) + + +# Use our search parameters to setup a search Query +short_name = 'ATL03' +spatial_extent = list(grandmesa.bounds) +date_range = ['2019-12-01','2019-12-12'] +region = ipx.Query(short_name, spatial_extent, date_range) + +# Display if any data files, or granules, matched our search +region.avail_granules(ids=True) + +# Download the granules to a into a folder called 'bosque_primavera_ATL08' +region.download_granules('/data/grandmesa_ATL08') diff --git a/icesat-2_kdph_py/kd_utils/Kd_analysis.py b/icesat-2_kdph_py/kd_utils/Kd_analysis.py new file mode 100644 index 0000000..6e28301 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/Kd_analysis.py @@ -0,0 +1,264 @@ +# utils/Kd_analysis.py +# updated to perform a linear fit in log-space, just like MATLAB's polyfitn(zdepth, y, 1) for a first-order polynomial. + +import numpy as np +import pandas as pd +import logging +# from scipy.optimize import curve_fit +from sklearn.linear_model import LinearRegression + +# def log_model(z, kd, e0): +# return np.log(e0) - kd * z + +## This is wrong because the input is already a bined data, +## it is not necessary to do a histogram again +# def CalculateKdFromFilteredSubsurfacePhoton(df, vertical_res=0.8): +# if df.empty or 'lat_bins' not in df.columns: +# return pd.DataFrame({'lat_bins': [np.nan], 'kd': [np.nan], 'e0': [np.nan], 'latitude': [np.nan], 'longitude': [np.nan]}) + +# # Get the latitude bin value +# lat_bin_value = df['lat_bins'].iloc[0] if not df['lat_bins'].empty else np.nan +# latitude = df['latitude'].mean() if 'latitude' in df.columns else df['lat'].mean() if 'lat' in df.columns else np.nan +# longitude = df['longitude'].mean() if 'longitude' in df.columns else df['lon'].mean() if 'lon' in df.columns else np.nan + +# # Calculate photon height range +# photon_height_min = df['photon_height'].min() +# photon_height_max = df['photon_height'].max() + +# # Check for sufficient data range +# if np.isnan(photon_height_min) or np.isnan(photon_height_max) or photon_height_min == photon_height_max: +# return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + +# height_bins_range = abs(photon_height_max - photon_height_min) +# height_bins_number = round(height_bins_range / vertical_res) + +# # Ensure there are enough bins +# if height_bins_number < 5: +# return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + +# # Create histogram of photon heights +# bin_edges = np.linspace(photon_height_min, photon_height_max, num=height_bins_number) +# counts, _ = np.histogram(df['photon_height'], bins=bin_edges) +# bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + +# # Store histogram data +# hist_df = pd.DataFrame({'zdepth': bin_centers, 'photon_counts': counts}) + +# # x value for model +# # Reverse zdepth to align with the MATLAB approach +# hist_df['zdepth'] = hist_df['zdepth'].max() - hist_df['zdepth'] + +# # Log-transform photon counts, replacing zeros with NaN +# # y value for model +# hist_df['log_photon_counts'] = np.log(hist_df['photon_counts'].replace(0, np.nan)) +# hist_df.loc[np.isinf(hist_df['log_photon_counts']), 'log_photon_counts'] = np.nan + +# # Filter out rows with NaNs in either column for regression +# valid_data = hist_df.dropna(subset=['zdepth', 'log_photon_counts']) + +# # Check for enough valid data points +# # Skip the regression if there are fewer than 5 datapoints +# if valid_data['log_photon_counts'].notna().sum() > 3: +# # Drop NaNs for regression +# zdepth_valid = valid_data['zdepth'].values.reshape(-1, 1) +# log_counts_valid = valid_data['log_photon_counts'].values + +# # Perform linear regression +# model = LinearRegression() +# model.fit(zdepth_valid, log_counts_valid) + +# # Extract kd as the negative of the slope and e0 from the intercept +# kd = -model.coef_[0] + +# print('zdepth_valid:',zdepth_valid) +# print('photon_counts:',hist_df['photon_counts']) +# print('kd:',kd) + +# e0 = np.exp(model.intercept_) + +# # Set kd to NaN if negative +# if kd < 0: +# kd = np.nan +# else: +# kd, e0 = np.nan, np.nan + +# return pd.DataFrame({ +# 'lat_bins': [lat_bin_value], +# 'kd': [kd], +# 'e0': [e0], +# 'latitude': [latitude], +# 'longitude': [longitude] +# }) + + +# # one solution is to adjust the vertical_res to 0.25 +# def CalculateKdFromFilteredSubsurfacePhoton(df, vertical_res=0.25): +# if df.empty or 'lat_bins' not in df.columns: +# return pd.DataFrame({'lat_bins': [np.nan], 'kd': [np.nan], 'e0': [np.nan], 'latitude': [np.nan], 'longitude': [np.nan]}) + +# # Get the latitude bin value +# lat_bin_value = df['lat_bins'].iloc[0] if not df['lat_bins'].empty else np.nan +# latitude = df['latitude'].mean() if 'latitude' in df.columns else df['lat'].mean() if 'lat' in df.columns else np.nan +# longitude = df['longitude'].mean() if 'longitude' in df.columns else df['lon'].mean() if 'lon' in df.columns else np.nan + +# # Calculate photon height range +# photon_height_min = df['photon_height'].min() +# photon_height_max = df['photon_height'].max() + +# # Check for sufficient data range +# if np.isnan(photon_height_min) or np.isnan(photon_height_max) or photon_height_min == photon_height_max: +# return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + +# height_bins_range = abs(photon_height_max - photon_height_min) +# height_bins_number = round(height_bins_range / vertical_res) + +# # Ensure there are enough bins +# if height_bins_number < 5: +# return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + +# # Create histogram of photon heights +# bin_edges = np.linspace(photon_height_min, photon_height_max, num=height_bins_number) +# counts, _ = np.histogram(df['photon_height'], bins=bin_edges) +# bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + +# # Store histogram data +# hist_df = pd.DataFrame({'zdepth': bin_centers, 'photon_counts': counts}) + +# # x value for model +# # Reverse zdepth to align with the MATLAB approach +# hist_df['zdepth'] = hist_df['zdepth'].max() - hist_df['zdepth'] + +# # Log-transform photon counts, replacing zeros with NaN +# # y value for model +# hist_df['log_photon_counts'] = np.log(hist_df['photon_counts'].replace(0, np.nan)) +# hist_df.loc[np.isinf(hist_df['log_photon_counts']), 'log_photon_counts'] = np.nan + +# # Filter out rows with NaNs in either column for regression +# valid_data = hist_df.dropna(subset=['zdepth', 'log_photon_counts']) + +# # Check for enough valid data points +# # Skip the regression if there are fewer than 5 datapoints +# if valid_data['log_photon_counts'].notna().sum() > 3: +# # Drop NaNs for regression +# zdepth_valid = valid_data['zdepth'].values.reshape(-1, 1) +# log_counts_valid = valid_data['log_photon_counts'].values + +# # Perform linear regression +# model = LinearRegression() +# model.fit(zdepth_valid, log_counts_valid) + +# # Extract kd as the negative of the slope and e0 from the intercept +# kd = -model.coef_[0] + +# print('zdepth_valid:',zdepth_valid) +# print('photon_counts:',hist_df['photon_counts']) +# print('kd:',kd) + +# e0 = np.exp(model.intercept_) + +# # Set kd to NaN if negative +# if kd < 0: +# kd = np.nan +# else: +# kd, e0 = np.nan, np.nan + +# return pd.DataFrame({ +# 'lat_bins': [lat_bin_value], +# 'kd': [kd], +# 'e0': [e0], +# 'latitude': [latitude], +# 'longitude': [longitude] +# }) + + +# another solution is to calculate kd without hist +def CalculateKdFromFilteredSubsurfacePhoton(df, vertical_res=0.8): + # Early exit if DataFrame is empty or missing required column + if df.empty or 'lat_bins' not in df.columns: + return pd.DataFrame({'lat_bins': [np.nan], 'kd': [np.nan], 'e0': [np.nan], 'latitude': [np.nan], 'longitude': [np.nan]}) + + # Retrieve latitude and longitude + lat_bin_value = df['lat_bins'].iloc[0] if not df['lat_bins'].empty else np.nan + latitude = df['latitude'].mean() if 'latitude' in df.columns else df['lat'].mean() if 'lat' in df.columns else np.nan + longitude = df['longitude'].mean() if 'longitude' in df.columns else df['lon'].mean() if 'lon' in df.columns else np.nan + + # Use value_counts to get photon counts in each height bin + height_counts = df['height_bins'].value_counts().sort_index() + bin_centers = height_counts.index.astype(float) + + # Create a DataFrame for the height bins and counts + hist_df = pd.DataFrame({'zdepth': bin_centers, 'photon_counts': height_counts.values}) + + # Reverse zdepth for model alignment + hist_df['zdepth'] = hist_df['zdepth'].max() - hist_df['zdepth'] + + # Log-transform photon counts, replacing zeros with NaN for regression + hist_df['log_photon_counts'] = np.log(hist_df['photon_counts'].replace(0, np.nan)) + hist_df.loc[np.isinf(hist_df['log_photon_counts']), 'log_photon_counts'] = np.nan + + # Filter for rows without NaNs for regression + valid_data = hist_df.dropna(subset=['zdepth', 'log_photon_counts']) + + # Perform regression if there are sufficient valid data points + if valid_data['log_photon_counts'].notna().sum() > 3: + zdepth_valid = valid_data['zdepth'].values.reshape(-1, 1) + log_counts_valid = valid_data['log_photon_counts'].values + + # Perform linear regression + model = LinearRegression() + model.fit(zdepth_valid, log_counts_valid) + + # Calculate kd and e0 + kd = -model.coef_[0] + e0 = np.exp(model.intercept_) + + # Set kd to NaN if negative + if kd < 0: + kd = np.nan + else: + kd, e0 = np.nan, np.nan + + return pd.DataFrame({ + 'lat_bins': [lat_bin_value], + 'kd': [kd], + 'e0': [e0], + 'latitude': [latitude], + 'longitude': [longitude] + }) + + +# Original kd calculation function remains unchanged +def calculate_kd(filtered_seafloor_subsurface_photon_dataset): + logging.info("Calculating Kd from filtered subsurface photon dataset") + SubsurfacePhotonDFAddedKd = filtered_seafloor_subsurface_photon_dataset\ + .groupby('lat_bins', observed=False)\ + .apply(CalculateKdFromFilteredSubsurfacePhoton, include_groups=True) + + + # Remove the index without resetting it if 'lat_bins' already exists + SubsurfacePhotonDFAddedKd = SubsurfacePhotonDFAddedKd.droplevel(0) + return SubsurfacePhotonDFAddedKd + + +# Updated function to apply kd calculation beam-by-beam +def process_kd_calculation(Final_filtered_subsurface_photon_dataset): + # Initialize list to store results for each beam + kd_beam_datasets = [] + + # Group by 'beam_id' to process each beam independently + for beam_id, beam_data in Final_filtered_subsurface_photon_dataset.groupby('beam_id'): + logging.info(f"Calculating Kd for beam: {beam_id}") + + # Apply the calculate_kd function to the current beam's dataset + SubsurfacePhotonDFAddedKd = calculate_kd(beam_data) + + # Add a column to track the beam_id in the results + SubsurfacePhotonDFAddedKd['beam_id'] = beam_id + + # Append the result to the list + kd_beam_datasets.append(SubsurfacePhotonDFAddedKd) + + # Combine results from all beams into a single DataFrame + combined_kd_dataset = pd.concat(kd_beam_datasets, ignore_index=True) + + return combined_kd_dataset \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SeaSurfaceFlattening.py b/icesat-2_kdph_py/kd_utils/SeaSurfaceFlattening.py new file mode 100644 index 0000000..6b3d64f --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SeaSurfaceFlattening.py @@ -0,0 +1,446 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jul 25 13:25:53 2024 + +@author: wayne +""" + +from kd_utils import * +import scipy.interpolate + + +path = 'C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/Dataset/ATL03_ICESat2/' +ATL03_h5_file = "processed_ATL03_20220122044818_04721407_006_01.h5" +ATL03_h5_file_path = path + ATL03_h5_file + + +# Set the shoreline data path +Current_Path='C:/Workstation/ICESat2_HLS/Kd_ComparisionPaper/Dataset/' + +#load the shoreline package +shoreline_data_path = Current_Path+'Shorelines/GeoPkgGlobalShoreline.gpkg' + + + +# speed of light +c = 299792458.0 + +# bin data first based on horizontal resolution +horizontal_res=10 + +# vertical resolution +# vertical_res=0.1 +vertical_res=0.25 +# vertical_res=1 + +# sea subsurface thresh +# determine subsurface by setting how depth below the sea surface +subsurface_thresh = 0.5 + + +# define specific rectangular target zone for batch analysis +# i.e., From subsurface depth 0.5 to specific KdMaxDepth depth (e.g, 6) +Kd_max_depth = 6 + + +######################################## +#readATL03_github +IS2_atl03_mds,IS2_atl03_attrs,IS2_atl03_beams = read_granule(ATL03_h5_file_path,ATTRIBUTES=True) + +# extract parameters from ICESat-2 ATLAS HDF5 file name +rx = re.compile(r'(processed_)?(ATL\d{2})_(\d{4})(\d{2})(\d{2})(\d{2})' + r'(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$') +SUB,PRD,YY,MM,DD,HH,MN,SS,TRK,CYCL,GRAN,RL,VERS,AUX = rx.findall(ATL03_h5_file_path).pop() + + +# initialize data storage variables +# variables of interest for generating three types classification +Segment_ID = {} +Segment_Index_begin = {} +Segment_PE_count = {} + +Equator_Segment_Distance = {} +Segment_Length = {} + +Segment_Is_Land={} + +# mean geolocation, height and delta time +Segment_Lon = {} +Segment_Lat = {} +Segment_Elev = {} +Segment_Time = {} + +Segment_ref_elev= {} +Segment_ref_azimuth= {} + +# relative_AT_dist = {} +# relative_seg_dist = {} + +# background photon rate +background_rate = {} +background_counts={} + + + +######################################## +#proc02_manually_select_water_forGitHub +#remove water surface based on simple 0.5 m + + +#raw beam-by-beam photon data after cutoff below the max value 0.5 below peak +IS2_atl03Cut50cmBelowPeak={} + +# for each input beam within the file +for gtx in sorted(IS2_atl03_beams[0:1]): +# for gtx in sorted(IS2_atl03_beams): + # data and attributes for beam gtx + IS2_val = IS2_atl03_mds[gtx] + IS2_attrs = IS2_atl03_attrs[gtx] + + # ATL03 Segment ID + Segment_ID[gtx] = IS2_val['geolocation']['segment_id'] + # number of valid overlapping ATL03 segments + n_seg = len(Segment_ID[gtx]) + # number of photon events + n_pe, = IS2_val['heights']['delta_time'].shape + + # first photon ID (1-based) in each segment (convert to 0-based indexing) + Segment_Index_begin[gtx] = IS2_val['geolocation']['ph_index_beg'] - 1 + + # number of photon events in the segment + Segment_PE_count[gtx] = IS2_val['geolocation']['segment_ph_cnt'] + + # along-track distance from the equator crossing to the start of each ATL03 20 meter geolocation segment + Equator_Segment_Distance[gtx] = IS2_val['geolocation']['segment_dist_x'] + + # along-track length for each ATL03 segment + Segment_Length[gtx] = IS2_val['geolocation']['segment_length'] + + # Transmit time of the reference photon + delta_time = IS2_val['geolocation']['delta_time'] + + # get geolocation lat/lon + segment_lat = IS2_val['geolocation']['reference_photon_lat'][:].copy() + segment_lon= IS2_val['geolocation']['reference_photon_lon'][:].copy() + + # get parameters ref elev and azimuth for refraction correction + ref_elev = IS2_val['geolocation']['ref_elev'][:].copy() + ref_azimuth = IS2_val['geolocation']['ref_azimuth'][:].copy() + + #get geoid for converting WGS84 ellipsoid to geoid correction + geoid = IS2_val['geophys_corr']['geoid'][:].copy() + + # photon event heights + h_ph = IS2_val['heights']['h_ph'][:].copy() + lat_ph = IS2_val['heights']['lat_ph'][:].copy() + lon_ph = IS2_val['heights']['lon_ph'][:].copy() +# dist_ph_along = IS2_val['heights']['dist_ph_along'][:].copy() + signal_conf_photon = IS2_val['heights']['signal_conf_ph'][...,0].copy() + + # along-track and across-track distance for photon events + x_atc = IS2_val['heights']['dist_ph_along'][:].copy() + y_atc = IS2_val['heights']['dist_ph_across'][:].copy() + + # photon quality + quality_ph=IS2_val['heights']['quality_ph'] + + #calculate along track distance + for seg_index in range(n_seg): + # index for 20m segment j + idx = Segment_Index_begin[gtx][seg_index] + # number of photons in 20m segment + cnt = Segment_PE_count[gtx][seg_index] + # add segment distance to along-track coordinates + x_atc[idx:idx+cnt] += Equator_Segment_Distance[gtx][seg_index] + + # calculate along track distance relative to the beginning of the cut segment + relative_AT_dist=(x_atc-x_atc[0])/1000 + + + relative_seg_dist=(Equator_Segment_Distance[gtx]-Equator_Segment_Distance[gtx][0])/1000 + + + # this function is a significant slowdown without pygeos + Segment_Is_Land['geometry'] = gpd.points_from_xy(segment_lon, segment_lat) + + # create a geo dataframe + ICESat2_GDF = gpd.GeoDataFrame(Segment_Is_Land,crs="EPSG:4326") + + #Step1: isolate water using landmask + # spatial joint to determine land/sea mask + Segment_Is_Land_Labels=isolate_sea_land_photons(shoreline_data_path,ICESat2_GDF) + +# # set the labels back to ICESat2 geopandas dataframe + ICESat2_GDF.loc[:, 'is_land'] = Segment_Is_Land_Labels + + # interpolate is_land labels based on photon lat + island_interp1d_model = scipy.interpolate.interp1d(segment_lat[:], + Segment_Is_Land_Labels[:], + fill_value="extrapolate") + + # Apply interpid model + is_land_label_interp1d = island_interp1d_model(lat_ph[:]) + + # Ref_elev on a per photon level (assign seg ref_elev to photons) + ref_elev_interp1d_model = scipy.interpolate.interp1d(segment_lat[:], + ref_elev[:], + fill_value="extrapolate") + # apply interpid model + ph_ref_elev = ref_elev_interp1d_model(lat_ph[:]) + + # Ref_azimuth on a per photon level (assign seg ref_azimuth to photons) + ref_azimuth_interp1d_model = scipy.interpolate.interp1d(segment_lat[:], + ref_azimuth[:], + fill_value="extrapolate") + + # apply interpid model + ph_ref_azimuth = ref_azimuth_interp1d_model(lat_ph[:]) + + + # interpolate geoid based on segment lat + geoid_interp1d_model = scipy.interpolate.interp1d(segment_lat[:], + geoid[:], + fill_value="extrapolate") + + # Apply interpid model + ph_geoid = geoid_interp1d_model(lat_ph[:]) + + + #geoid correct + h_ph_geoid_cor = h_ph[:] - ph_geoid[:] + + # Find the epsg code + epsg_code = convert_wgs_to_utm(lon_ph[0], lat_ph[0]) + epsg_num = int(epsg_code.split(':')[-1]) + + # Orthometrically correct the data using the epsg code + lat_utm, lon_utm, h_ph_cor = orthometric_correction(lat_ph, lon_ph, h_ph, epsg_code) + + # Aggregate data into dataframe + sea_photon_dataset = \ + pd.DataFrame({'latitude': lat_ph, 'longitude': lon_ph, + 'lat': lat_utm, 'lon': lon_utm, + 'photon_height': h_ph_geoid_cor, + 'quality_ph': quality_ph, + 'is_land_label': is_land_label_interp1d, + 'photon_conf': signal_conf_photon, + 'ref_elevation': ph_ref_elev, + 'ref_azimuth': ph_ref_azimuth, + 'relative_AT_dist':relative_AT_dist}, + columns=['latitude', 'longitude', + 'lat', 'lon', + 'photon_height', + 'quality_ph', + 'is_land_label', 'photon_conf', + 'ref_elevation', 'ref_azimuth', + 'relative_AT_dist']) + + # #remove the saturated photons + # sea_photon_dataset=sea_photon_dataset[sea_photon_dataset['quality_ph'] < 1] + + #remove the land photons + sea_photon_dataset=sea_photon_dataset[sea_photon_dataset['is_land_label'] != 1] + + # bin the data by 25 cm + binned_dataset_sea_surface=horizontal_vertical_bin_dataset(sea_photon_dataset, horizontal_res, vertical_res) + + # get sea surface height + # threshold to determine how much depth below sea surface will be accounted + # Output: + # ->final_sea_surface_height + # ->sea_surface_height_abnormal_label + # ->sea_surface_dominated_label + sea_surface_height, sea_surface_label, solo_sea_surface_label, subsurface_photon_dataset = \ + get_sea_surface_height(binned_dataset_sea_surface, subsurface_thresh) + + +sea_surface_height.reverse() + + + + + +#################################################################### +# Tasks: Sea Surface flattening +# I have a list value of sea_surface_height over each 10 m bin, I would like to +# 1) calculate the mean sea_surface_height value along each big bin with 500 m ; +# 2) Within each 10-m along track increment, +# adjust all of the photons (i.e., subsurface_photon_dataset) within that 10-m column either up or down +# based on the deviation of the local sea surface height from the mean sea surface height + +###################### +# Step 1: Creating a DataFrame for sea_surface_height with a corresponding 10 m small bin index +# each element in sea_surface_height corresponds to a 10 m bin +df_sea_surface = pd.DataFrame({ + 'sea_surface_height': sea_surface_height, # sea_surface_height is provided as a list + 'small_bins': np.arange(len(sea_surface_height)) # Index for each 10 m bin +}) + +# # Map 'small_bins'(i.e., 'lat_bins') to 500 m 'big_bins' +df_sea_surface['big_bins'] = df_sea_surface['small_bins'] // 50 # 50 10 m bins in each 500 m bin + + +###################### +# Step 2: Calculate Mean Sea_surface_height value for Each 500-m Big Bin +mean_sea_surface_elevation_bigBin = df_sea_surface.groupby('big_bins')['sea_surface_height'].mean().reset_index() +mean_sea_surface_elevation_bigBin.rename(columns={'sea_surface_height': 'mean_sea_surface_height_big_bins'}, inplace=True) + +# Prepare subsurface_photon_dataset for merging +subsurface_photon_dataset['big_bins'] = subsurface_photon_dataset['lat_bins'].astype(int) // 50 + +# Merge the mean elevation data with the subsurface_photon_dataset +subsurface_photon_dataset = subsurface_photon_dataset.merge(mean_sea_surface_elevation_bigBin,\ + on='big_bins', how='left') + +# Merge sea_surface_height data with subsurface_photon_dataset based on the small_bins +subsurface_photon_dataset = subsurface_photon_dataset.merge(df_sea_surface[['small_bins', 'sea_surface_height']], \ + left_on='lat_bins', right_on='small_bins', how='left') + + +####################### +# Step 3: Within each 10 m small bin, adjust the photon heights +# based on the deviation between their sea surface height at each small bin and the mean sea surface height within each 500-m bin +subsurface_photon_dataset['adjusted_photon_height'] = subsurface_photon_dataset['photon_height'] - \ + (subsurface_photon_dataset['mean_sea_surface_height_big_bins']-\ + subsurface_photon_dataset['sea_surface_height']\ + ) + +subsurface_photon_dataset['Dif_photon_height'] = (subsurface_photon_dataset['mean_sea_surface_height_big_bins']-\ + subsurface_photon_dataset['sea_surface_height']\ + ) +###################################### +################################### +###################################### +################################### +###Visualization + +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1.inset_locator import inset_axes +from matplotlib.patches import Rectangle +import matplotlib.transforms as mtransforms + + + +sea_surface_x_axis_bins = np.linspace(relative_seg_dist.min(), + relative_seg_dist.max(), len(sea_surface_height)) + + + +mean_sea_surface_x_axis_bins = np.linspace(relative_seg_dist.min(), + relative_seg_dist.max(), len(mean_sea_surface_elevation_bigBin)) + + +# Adjusting the global font size +plt.rcParams['font.size'] = 18 # You can choose your own font size here + +# Define your zoom area limits for the x-axis +zoom_xlim = (10.5, 11.5) # for example, from 10 to 20 on your x-axis + +# Filter the data based on your x-axis limits +# Here, we're creating a boolean index that matches your zoom criteria +x_index_within_zoom = (sea_photon_dataset.relative_AT_dist >= zoom_xlim[0]) & (sea_photon_dataset.relative_AT_dist <= zoom_xlim[1]) + +# Apply the index to your x and y data +zoomed_relative_AT_dist = sea_photon_dataset.relative_AT_dist[x_index_within_zoom] +zoomed_photon_height = sea_photon_dataset.photon_height[x_index_within_zoom] + +#get the x index for adjusted subsurface_photon +subsurface_x_index_within_zoom = (subsurface_photon_dataset.relative_AT_dist >= zoom_xlim[0]) & (subsurface_photon_dataset.relative_AT_dist <= zoom_xlim[1]) +subsurface_zoomed_relative_AT_dist = subsurface_photon_dataset.relative_AT_dist[subsurface_x_index_within_zoom] +zoomed_photon_height_adjusted = subsurface_photon_dataset.adjusted_photon_height[subsurface_x_index_within_zoom] +zoomed_photon_height_Dif = subsurface_photon_dataset.Dif_photon_height[subsurface_x_index_within_zoom] + + + +# Now proceed with creating your main plot +hlims = [-40, 5] # height limits +sub_hlims = [-10, 3] # height limits + + + +######################### +fig = plt.figure(figsize=(16, 8), dpi=300, facecolor='w', edgecolor='k') +ax = fig.add_subplot(111) +ax.set_xlabel('Distance From Start of Track (km)') +ax.set_ylabel('Photon Height (m)') + +ax.scatter(sea_photon_dataset.relative_AT_dist, sea_photon_dataset.photon_height, + s=10, c='k', alpha=0.15, edgecolors='none', label='ATL03 Photons') +ax.plot(sea_surface_x_axis_bins, sea_surface_height, linewidth=0.8, color='b', label='Surface Peak') +ax.plot(sea_surface_x_axis_bins, [x - 0.5 for x in sea_surface_height], linewidth=0.8, color='#DD571C', label='0.5 m Below Surface Peak') +ax.plot(sea_surface_x_axis_bins, [x - 1 for x in sea_surface_height], linewidth=0.8, color='#FDA172', label='1 m Below Surface Peak') +ax.plot(sea_surface_x_axis_bins, [x - 2 for x in sea_surface_height], linewidth=0.8, color='#FCAE1E', label='2 m Below Surface Peak') + +# Create an inset axis for the zoomed area +# axins = inset_axes(ax, width=5, height=3, loc='lower left') # adjust the location as needed + +# Assuming 'ax' is your main axes +trans = mtransforms.blended_transform_factory(ax.figure.transFigure, ax.transAxes) +axins = inset_axes(ax, width=5, height=3, loc=3, + bbox_to_anchor=(0.2, 0.1, 0.4, 0.4), # The numbers are x0, y0, width, and height in percentages of the figure size. + bbox_transform=trans, + borderpad=0) + +# Plot the filtered data on the inset axes +axins.scatter(zoomed_relative_AT_dist, zoomed_photon_height, + s=10, c='k', alpha=0.15, edgecolors='none', label='Zoomed ATL03 Photons') + +axins.scatter(subsurface_zoomed_relative_AT_dist, zoomed_photon_height_adjusted, + s=10, c='r', alpha=0.15, edgecolors='none', label='Zoomed Adjusted ATL03 Photons') + + +axins.scatter(subsurface_zoomed_relative_AT_dist, zoomed_photon_height_Dif, + s=20, c='g', alpha=0.15, edgecolors='none', label='Zoomed Dif ATL03 Photons') + +# Add horizontal line at y=0 +axins.axhline(y=0, color='blue', alpha=0.15, linestyle='--', linewidth=1) + +# # For the lines, you might need to adjust your data arrays to match the zoomed range +# # assuming your sea_surface_x_axis_bins aligns directly with your sea_surface_height data +# zoomed_sea_surface_x = [x for x in sea_surface_x_axis_bins if zoom_xlim[0] <= x <= zoom_xlim[1]] +# zoomed_sea_surface_height = sea_surface_height[:len(zoomed_sea_surface_x)] # adjust based on your data alignment + +# First, we need to find the indices of the items in sea_surface_x_axis_bins that fall within the zoomed range. +zoomed_indices = [index for index, x in enumerate(sea_surface_x_axis_bins) if zoom_xlim[0] <= x <= zoom_xlim[1]] + +# Then, use these indices to select the corresponding items from both sea_surface_x_axis_bins and sea_surface_height. +zoomed_sea_surface_x = [sea_surface_x_axis_bins[i] for i in zoomed_indices] +zoomed_sea_surface_height = [sea_surface_height[i] for i in zoomed_indices] + + +#0197f6,#BE5504,#FA8128 +axins.plot(zoomed_sea_surface_x, zoomed_sea_surface_height, linewidth=1, color='b', label='Zoomed Surface Peak') +axins.plot(zoomed_sea_surface_x, [x - 0.5 for x in zoomed_sea_surface_height], linewidth=1, color='#DD571C', label='0.5 m Below Zoomed Surface Peak') +axins.plot(zoomed_sea_surface_x, [x - 1 for x in zoomed_sea_surface_height], linewidth=1, color='#FDA172', label='1 m Below Zoomed Surface Peak') +axins.plot(zoomed_sea_surface_x, [x - 2 for x in zoomed_sea_surface_height],linewidth=1, color='#FCAE1E', label='2 m Below Zoomed Surface Peak') + +#mean sea surface height at 500m bins +axins.plot(mean_sea_surface_x_axis_bins, mean_sea_surface_elevation_bigBin, linewidth=1, color='r', label='Zoomed Surface Peak 500m bin') + + +# Set the x limits for the inset axes; y limits remain unchanged +axins.set_xlim(zoom_xlim) +axins.set_ylim(sub_hlims) + +axins.set_title('Zoomed in') + + +# Adding the rectangle indicating the zoomed area +# Draw a rectangle on the main plot to indicate the zoomed area +# We use the data coordinates for the rectangle, as it needs to correspond to the actual data points on the main axes. +rec_hlims=[-10,3] +rectangle = Rectangle((zoom_xlim[0], rec_hlims[0]), zoom_xlim[1] - zoom_xlim[0], rec_hlims[1] - rec_hlims[0], + linewidth=1, edgecolor='r', facecolor='none', linestyle='--') +ax.add_patch(rectangle) + + +# Finalize the main plot adjustments +ax.legend(loc='lower right') +ax.set_ylim(hlims) + +plt.savefig(path+'/'+YY+MM+DD+'Track'+TRK+'Plot'+gtx+'_With_depth_colored.jpg', dpi=500,bbox_inches='tight') +# Display the plot +plt.show() + + + diff --git a/icesat-2_kdph_py/kd_utils/SeafloorFilterByGEBCO_Module.py b/icesat-2_kdph_py/kd_utils/SeafloorFilterByGEBCO_Module.py new file mode 100644 index 0000000..51a7396 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SeafloorFilterByGEBCO_Module.py @@ -0,0 +1,108 @@ +import rasterio +import numpy as np +import pandas as pd +from rtree import index +from shapely.geometry import box, Point + +# 1. Function to create an R-tree spatial index for raster bounds +def create_spatial_index(gebco_paths): + """ + Create an R-tree spatial index for raster bounds to quickly find relevant rasters. + + Parameters: + gebco_paths (list): List of paths to GEBCO raster files. + + Returns: + raster_data_dict (dict): Dictionary containing loaded rasters and their respective data. + spatial_index (rtree.index.Index): R-tree spatial index for raster bounds. + """ + idx = index.Index() + raster_data_dict = {} + for i, path in enumerate(gebco_paths): + with rasterio.Env(GTIFF_SRS_SOURCE='EPSG'): + gebco_raster = rasterio.open(path) + raster_data = gebco_raster.read(1) + raster_data_dict[path] = (gebco_raster, raster_data) + bounds = gebco_raster.bounds + idx.insert(i, (bounds.left, bounds.bottom, bounds.right, bounds.top), obj=path) + return raster_data_dict, idx + +# 2. Function to determine which rasters are needed for a given set of coordinates using spatial index in a batch manner +def get_relevant_rasters_using_index(lons, lats, raster_data_dict, spatial_index): + """ + Determine which raster datasets are relevant for the given coordinates using spatial index. + This function uses a more efficient approach by performing a bounding box query for batches of points. + + Parameters: + lons (numpy.ndarray): Array of longitudes. + lats (numpy.ndarray): Array of latitudes. + raster_data_dict (dict): Dictionary containing raster datasets and their data. + spatial_index (rtree.index.Index): R-tree spatial index for raster bounds. + + Returns: + relevant_rasters (list): List of relevant raster datasets and their respective data. + """ + # Create a bounding box that covers all points + min_lon, max_lon = lons.min(), lons.max() + min_lat, max_lat = lats.min(), lats.max() + bounding_box = box(min_lon, min_lat, max_lon, max_lat) + + # Get all rasters that intersect with the bounding box + matches = list(spatial_index.intersection((bounding_box.bounds), objects=True)) + relevant_paths = {match.object for match in matches} + relevant_rasters = [(raster_data_dict[path][0], raster_data_dict[path][1]) for path in relevant_paths] + return relevant_rasters + +# 3. Function to get seafloor elevation from the relevant rasters in a vectorized manner +def get_seafloor_elevation(lons, lats, relevant_rasters): + """ + Get seafloor elevations for a batch of points based on longitude and latitude from relevant rasters. + + Parameters: + lons (numpy.ndarray): Array of longitudes. + lats (numpy.ndarray): Array of latitudes. + relevant_rasters (list): List of relevant raster datasets and their respective data. + + Returns: + seafloor_elevations (numpy.ndarray): Array of seafloor elevations. + """ + points = np.vstack((lons, lats)).T + seafloor_elevations = np.full(len(lons), np.nan) + + for gebco_raster, raster_data in relevant_rasters: + # Use rasterio.sample to get values for multiple points in a batch + values = list(gebco_raster.sample(points)) + for idx, value in enumerate(values): + if np.isnan(seafloor_elevations[idx]) and value is not None: + seafloor_elevations[idx] = value[0] + + return seafloor_elevations + +# 4. The main process function that ties everything together +def process_seafloor_data(gebco_paths, sea_photon_dataset): + """ + Main function to process the seafloor data. + + Parameters: + gebco_paths (list): List of paths to GEBCO raster files. + sea_photon_dataset (pandas.DataFrame): Dataset containing longitude, latitude, and photon height. + + Returns: + filtered_sea_photon_dataset (pandas.DataFrame): Filtered dataset containing points above the seafloor. + """ + # Create spatial index and load GEBCO Raster Data + raster_data_dict, spatial_index = create_spatial_index(gebco_paths) + + # Get the relevant rasters using spatial index + lons = sea_photon_dataset['longitude'].values + lats = sea_photon_dataset['latitude'].values + relevant_rasters = get_relevant_rasters_using_index(lons, lats, raster_data_dict, spatial_index) + + # Get seafloor elevation for all points + sea_photon_dataset['seafloor_elevation'] = get_seafloor_elevation(lons, lats, relevant_rasters) + + # Filter out points below the seafloor + filtered_sea_photon_dataset = sea_photon_dataset[sea_photon_dataset['photon_height'] > sea_photon_dataset['seafloor_elevation']] + # filtered_sea_photon_dataset.drop(columns=['seafloor_elevation'], inplace=True) + + return filtered_sea_photon_dataset \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis.csv b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis.csv new file mode 100644 index 0000000..d0e0bf0 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis.csv @@ -0,0 +1,126 @@ +file,total_background_rate,solar_elevation,classification,solar_background_rate +processed_ATL03_20200103231215_01260602_006_01.h5,2693.2626911948337,-14.366390228271484,Nighttime, +processed_ATL03_20200120221315_03850602_006_01.h5,13806.160938730458,-3.189083235595707,Twilight,0.0 +processed_ATL03_20200123102047_04230606_006_01.h5,40221.2978331275,-20.771891516072188,Nighttime, +processed_ATL03_20200127101229_04840606_006_01.h5,53756.80956395094,-22.627933479509,Nighttime, +processed_ATL03_20200128215639_05070602_006_01.h5,359099.1436096114,3.0444767585617476,Twilight,227345.78169107437 +processed_ATL03_20200201214818_05680602_006_01.h5,529435.552229354,5.779487614282246,Twilight,329867.47343444824 +processed_ATL03_20200218204918_08270602_006_01.h5,1184736.9505547313,17.405200287505618,Daytime,1666541.374866092 +processed_ATL03_20200221085649_08650606_006_01.h5,22374.124763767093,-32.0131039217493,Nighttime, +processed_ATL03_20200222204059_08880602_006_01.h5,688033.4751943491,20.51920772131919,Daytime,1861048.2517014507 +processed_ATL03_20200225084829_09260606_006_01.h5,38114.72681797276,-33.27572123209635,Nighttime, +processed_ATL03_20200226203238_09490602_006_01.h5,4792078.4466824625,23.537683123633975,Daytime,2049588.014623465 +processed_ATL03_20200229084009_09870606_006_01.h5,26127.755282484995,-34.3192597084936,Nighttime, +processed_ATL03_20200301202417_10100602_006_01.h5,889223.9040683447,26.674097898949583,Daytime,2245494.4969852646 +processed_ATL03_20200305201556_10710602_006_01.h5,3788257.105734832,30.298559347788494,Daytime,2471885.310625827 +processed_ATL03_20200318192523_12690602_006_01.h5,1351106.399095953,37.82273376543905,Daytime,2941859.6757638804 +processed_ATL03_20200322191701_13300602_006_01.h5,4055652.4726818614,40.80553877688595,Daytime,3128171.3976581595 +processed_ATL03_20200325072428_13680606_006_01.h5,276003.2899430346,-38.01975886027018,Nighttime, +processed_ATL03_20200326190838_00040702_006_01.h5,1214166.848817978,44.136398494099446,Daytime,3336223.2840271275 +processed_ATL03_20200329071609_00420706_006_01.h5,40921.18564560988,-38.085152261671595,Nighttime, +processed_ATL03_20200330190017_00650702_006_01.h5,2897362.4389916896,47.033739194511774,Daytime,3517196.7411376704 +processed_ATL03_20200403185200_01260702_006_03.h5,1422029.572889437,50.41658441740958,Daytime,3728495.7418974275 +processed_ATL03_20200416180123_03240702_006_02.h5,1212471.3817819578,56.88941257443048,Daytime,4132801.003164019 +processed_ATL03_20200419060855_03620706_006_02.h5,11336.364038336214,-36.11201312276911,Nighttime, +processed_ATL03_20200420175305_03850702_006_02.h5,8790328.564517578,59.43912149047851,Daytime,4292060.7122818045 +processed_ATL03_20200423060037_04230706_006_02.h5,13875.464784802263,-36.77773346454934,Nighttime, +processed_ATL03_20200424174447_04460702_006_02.h5,11769916.10352509,62.44312992182514,Daytime,4479696.84045018 +processed_ATL03_20200427055215_04840706_006_02.h5,24467.29998703863,-37.28700614391692,Nighttime, +processed_ATL03_20200428173625_05070702_006_02.h5,5890936.079037134,64.92542139996337,Daytime,4634745.527092652 +processed_ATL03_20200501054353_05450706_006_01.h5,27995.160394601022,-35.154712923723665,Nighttime, +processed_ATL03_20200502172804_05680702_006_01.h5,1858938.9999125663,66.71920158729003,Daytime,4746788.477978858 +processed_ATL03_20200514045317_07430706_006_01.h5,101200.1387867376,-33.12479663476581,Nighttime, +processed_ATL03_20200515163727_07660702_006_01.h5,4029271.209775364,70.89727247756439,Daytime,5007758.799128272 +processed_ATL03_20200518044459_08040706_006_01.h5,21390.274746852945,-32.48489073114458,Nighttime, +processed_ATL03_20200519162909_08270702_006_01.h5,11263527.80896702,71.30119411230662,Daytime,5032988.519136913 +processed_ATL03_20200522043639_08650706_006_01.h5,27726.586265306785,-30.78756402104017,Nighttime, +processed_ATL03_20200523162049_08880702_006_01.h5,2394186.0851803604,71.34785883815056,Daytime,5035903.287413869 +processed_ATL03_20200526042817_09260706_006_01.h5,192063.31330177086,-30.286723850078406,Nighttime, +processed_ATL03_20200527161228_09490702_006_01.h5,3866876.0375585253,70.91486510319069,Daytime,5008857.668266717 +processed_ATL03_20200530041958_09870706_006_01.h5,14797.137764492889,-28.866483673855846,Nighttime, +processed_ATL03_20200531160408_10100702_006_01.h5,2673537.075175427,69.99713837967872,Daytime,4951534.696959596 +processed_ATL03_20200608033738_11240706_006_01.h5,34564.247899867914,-27.469124403873938,Nighttime, +processed_ATL03_20200609152147_11470702_006_01.h5,1473215.054213498,66.15265682069996,Daytime,4711401.005251505 +processed_ATL03_20200612032917_11850706_006_01.h5,38634.13533752579,-26.62868586505384,Nighttime, +processed_ATL03_20200613151327_12080702_006_01.h5,6435067.604220629,64.68456579542953,Daytime,4619701.224083781 +processed_ATL03_20200616032058_12460706_006_01.h5,16662.513664877562,-24.48468769049549,Nighttime, +processed_ATL03_20200617150508_12690702_006_01.h5,8234690.942282691,62.90583281198949,Daytime,4508598.150412161 +processed_ATL03_20200620031238_13070706_006_01.h5,78310.58091236708,-23.989503564854612,Nighttime, +processed_ATL03_20200621145648_13300702_006_01.h5,2091374.7284403527,60.61197065362462,Daytime,4365319.120481279 +processed_ATL03_20200624030418_13680706_006_01.h5,1450.7135090318498,-23.243765727303217,Nighttime, +processed_ATL03_20200625144828_00040802_006_01.h5,6557029.969762381,58.23505245263165,Daytime,4216852.250830274 +processed_ATL03_20200628025559_00420806_006_01.h5,110170.96579499701,-22.114351442730076,Nighttime, +processed_ATL03_20200629144009_00650802_006_01.h5,1611271.1367206054,55.93990911496594,Daytime,4073493.195983019 +processed_ATL03_20200703143148_01260802_006_01.h5,1517288.3956790732,53.58790397706497,Daytime,3926582.44403482 +processed_ATL03_20200707021337_01790806_006_01.h5,45946.903156638844,-19.85720067705427,Nighttime, +processed_ATL03_20200711020519_02400806_006_01.h5,37795.75206357141,-18.332053259394385,Nighttime, +processed_ATL03_20200712134930_02630802_006_01.h5,2751507.4577635746,47.60112804428408,Daytime,3552636.9369194973 +processed_ATL03_20200715015701_03010806_006_01.h5,177058.97577769912,-17.245845284873063,Nighttime, +processed_ATL03_20200716134112_03240802_006_01.h5,4225216.975207497,44.99416261486278,Daytime,3389800.876161126 +processed_ATL03_20200719014842_03620806_006_01.h5,84038.4457408923,-16.165504271212004,Nighttime, +processed_ATL03_20200720133252_03850802_006_01.h5,1086128.2890839668,42.32534367015925,Daytime,3223101.326206102 +processed_ATL03_20200723014022_04230806_006_01.h5,79140.35345762987,-15.127771424350552,Nighttime, +processed_ATL03_20200724132432_04460802_006_01.h5,3827796.7701561344,39.720189704580584,Daytime,3060378.412953644 +processed_ATL03_20200727013202_04840806_006_01.h5,62161.319765603504,-14.107249783170584,Nighttime, +processed_ATL03_20200728131612_05070802_006_01.h5,1088091.083936827,36.889148079226366,Daytime,2883546.1232297295 +processed_ATL03_20200801130752_05680802_006_01.h5,4673920.943899507,34.1490156007712,Daytime,2712392.193180585 +processed_ATL03_20200809004122_06820806_006_01.h5,179494.59699320886,-10.638800908295066,Nighttime, +processed_ATL03_20200810122532_07050802_006_01.h5,1068897.9927847406,27.84727919411428,Daytime,2318773.650787634 +processed_ATL03_20200813003303_07430806_006_01.h5,42490.099909907636,-9.501473152301626,Nighttime, +processed_ATL03_20200814121713_07660802_006_01.h5,5752663.929646432,25.0592073406471,Daytime,2144625.335429779 +processed_ATL03_20200817002443_08040806_006_01.h5,16617.049963783167,-8.673657633989055,Nighttime, +processed_ATL03_20200818120853_08270802_006_01.h5,1094279.5481663574,22.218095326033673,Daytime,1967164.0298782024 +processed_ATL03_20200821001623_08650806_006_01.h5,88550.37502800443,-7.514235541809514,Nighttime, +processed_ATL03_20200822120034_08880802_006_01.h5,3775479.618464736,19.388708499379472,Daytime,1790435.1020549166 +processed_ATL03_20200825000804_09260806_006_01.h5,89446.81397876945,-6.454590820296701,Nighttime, +processed_ATL03_20200826115214_09490802_006_01.h5,872472.4922581951,16.536142382767128,Daytime,1612258.3513240605 +processed_ATL03_20200830114354_10100802_006_01.h5,754567.2350125939,13.67799809981165,Daytime,1433733.177624752 +processed_ATL03_20200903113534_10710802_006_02.h5,581694.4312176822,10.853443807332303,Daytime,1257306.0991546002 +processed_ATL03_20200906231725_11240806_006_02.h5,33649.48062353326,-3.1481845660217744,Twilight,0.0 +processed_ATL03_20200908110135_11470802_006_02.h5,503253.43356873037,7.203728281765667,Daytime,1029337.867445496 +processed_ATL03_20200912105316_12080802_006_02.h5,341280.07894445007,4.380522070205796,Twilight,277428.3403222561 +processed_ATL03_20200914230046_12460806_006_02.h5,26186.26481486116,-0.9621860267135545,Twilight,77141.9540131092 +processed_ATL03_20200916104456_12690802_006_02.h5,125950.94852182808,1.6256601045111767,Twilight,174158.40681171417 +processed_ATL03_20200918225226_13070806_006_02.h5,125792.44703489641,0.31131358201008213,Twilight,124885.40938806534 +processed_ATL03_20200920103636_13300802_006_02.h5,210471.61413558375,-1.425258747820212,Twilight,59780.94674086571 +processed_ATL03_20200922224407_13680806_006_02.h5,172714.08971251204,1.1941339462355751,Twilight,157981.30216646194 +processed_ATL03_20200924102817_00040902_006_02.h5,5404.008946849824,-4.534453156049597,Twilight,0.0 +processed_ATL03_20200926223548_00420906_006_02.h5,319256.5753720403,2.0764338337093933,Twilight,191056.85539507866 +processed_ATL03_20200928101958_00650902_006_02.h5,42946.540686569366,-7.493123294715595,Nighttime, +processed_ATL03_20201002101131_01260902_006_01.h5,362.1598100051391,-10.500501094674165,Nighttime, +processed_ATL03_20201005215324_01790906_006_01.h5,295125.674828767,4.415555006044275,Twilight,278741.54800486565 +processed_ATL03_20201009214506_02400906_006_01.h5,331640.74092138046,5.289189014523632,Twilight,311489.24924016 +processed_ATL03_20201011092916_02630902_006_01.h5,3016.8440006209767,-17.102435897122042,Nighttime, +processed_ATL03_20201013213647_03010906_006_01.h5,974888.7458435383,6.2987469021792695,Daytime,972810.9947649338 +processed_ATL03_20201015092057_03240902_006_01.h5,96992.74215494073,-20.071210913659744,Nighttime, +processed_ATL03_20201017212828_03620906_006_01.h5,379385.6656480939,7.3146814146422745,Daytime,1036268.2129278439 +processed_ATL03_20201019091238_03850902_006_01.h5,30239.096275285712,-23.040433986865988,Nighttime, +processed_ATL03_20201021212007_04230906_006_01.h5,539059.9907695217,8.348818287791644,Daytime,1100862.3854504935 +processed_ATL03_20201023090418_04460902_006_01.h5,259884.82496922815,-26.032223481266826,Nighttime, +processed_ATL03_20201025211148_04840906_006_01.h5,2574614.756669485,9.36837614095687,Daytime,1164545.9244230608 +processed_ATL03_20201027085559_05070902_006_01.h5,469484.5235946693,-29.269784732699847,Nighttime, +processed_ATL03_20201029210331_05450906_006_01.h5,1521040.160322414,9.962021725053399,Daytime,1201626.1661899565 +processed_ATL03_20201031084740_05680902_006_01.h5,16340.668559343276,-31.71451310202092,Nighttime, +processed_ATL03_20201107202110_06820906_006_01.h5,674488.877680544,12.858877914241898,Daytime,1382569.3598001893 +processed_ATL03_20201109080520_07050902_006_01.h5,167326.14826736267,-38.60647640952581,Nighttime, +processed_ATL03_20201111201251_07430906_006_01.h5,1169250.8182872327,14.147239563596255,Daytime,1463042.8994357307 +processed_ATL03_20201113075702_07660902_006_01.h5,224.37694479006203,-41.561702853043876,Nighttime, +processed_ATL03_20201115200432_08040906_006_01.h5,1398742.022021409,15.108680614291737,Daytime,1523096.3514607523 +processed_ATL03_20201117074843_08270902_006_01.h5,18312.102258590567,-44.48965702690624,Nighttime, +processed_ATL03_20201119195613_08650906_006_01.h5,737208.2543954616,16.10340274771491,Daytime,1585228.6037444922 +processed_ATL03_20201121074024_08880902_006_01.h5,28627.621943633458,-47.40913550755529,Nighttime, +processed_ATL03_20201123194754_09260906_006_01.h5,682141.6288944753,17.083277940723377,Daytime,1646433.4876541982 +processed_ATL03_20201125073204_09490902_006_01.h5,45514.46755107935,-50.32221010594868,Nighttime, +processed_ATL03_20201127193934_09870906_006_01.h5,2059743.4233440266,18.298593901893994,Daytime,1722344.453507599 +processed_ATL03_20201129072343_10100902_006_01.h5,156814.4450448852,-53.07099330253307,Nighttime, +processed_ATL03_20201206185715_11240906_006_01.h5,4465255.531292323,20.777068463904033,Daytime,1877154.7282480402 +processed_ATL03_20201208064125_11470902_006_01.h5,36.89948285143027,-59.01391457703154,Nighttime, +processed_ATL03_20201210184856_11850906_006_01.h5,1565148.2148421055,21.865176377134688,Daytime,1945120.035448076 +processed_ATL03_20201212063308_12080902_006_01.h5,31776.705634769776,-61.83763375374974,Nighttime, +processed_ATL03_20201214184038_12460906_006_01.h5,6440840.810459436,22.93182274132584,Daytime,2011744.8131328654 +processed_ATL03_20201216062448_12690902_006_01.h5,7292.979228897971,-64.54162620939094,Nighttime, +processed_ATL03_20201218183218_13070906_006_01.h5,2698236.5121689388,23.989010371281957,Daytime,2077778.7801359287 +processed_ATL03_20201220061628_13300902_006_01.h5,10407.229098666925,-66.92680416509755,Nighttime, +processed_ATL03_20201222182358_13680906_006_01.h5,3179819.8703778996,25.082950329887492,Daytime,2146108.368075667 +processed_ATL03_20201226181538_00421006_006_01.h5,947034.319638348,26.143279988765048,Daytime,2212338.592231427 +processed_ATL03_20201228055948_00651002_006_01.h5,41673.45850649981,-70.68798426405911,Nighttime, diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis_updated.csv b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis_updated.csv new file mode 100644 index 0000000..6223ab9 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/atl03_analysis_updated.csv @@ -0,0 +1,126 @@ +file,total_background_rate,solar_elevation,classification,solar_background_rate +processed_ATL03_20200103231215_01260602_006_01.h5,2693.2626911948337,-14.366390228271484,Nighttime,0.0 +processed_ATL03_20200120221315_03850602_006_01.h5,13806.160938730458,-3.189083235595707,Twilight,144592.35717088566 +processed_ATL03_20200123102047_04230606_006_01.h5,40221.2978331275,-20.771891516072188,Nighttime,0.0 +processed_ATL03_20200127101229_04840606_006_01.h5,53756.80956395094,-22.627933479509,Nighttime,0.0 +processed_ATL03_20200128215639_05070602_006_01.h5,359099.1436096114,3.0444767585617476,Twilight,145235.43070705683 +processed_ATL03_20200201214818_05680602_006_01.h5,529435.552229354,5.779487614282246,Twilight,129869.34051024831 +processed_ATL03_20200218204918_08270602_006_01.h5,1184736.9505547313,17.405200287505618,Daytime,1748940.2905502943 +processed_ATL03_20200221085649_08650606_006_01.h5,22374.124763767093,-32.0131039217493,Nighttime,0.0 +processed_ATL03_20200222204059_08880602_006_01.h5,688033.4751943491,20.51920772131919,Daytime,1864309.6321896184 +processed_ATL03_20200225084829_09260606_006_01.h5,38114.72681797276,-33.27572123209635,Nighttime,0.0 +processed_ATL03_20200226203238_09490602_006_01.h5,4792078.4466824625,23.537683123633975,Daytime,1983658.3052103017 +processed_ATL03_20200229084009_09870606_006_01.h5,26127.755282484995,-34.3192597084936,Nighttime,0.0 +processed_ATL03_20200301202417_10100602_006_01.h5,889223.9040683447,26.674097898949583,Daytime,2116042.6390513824 +processed_ATL03_20200305201556_10710602_006_01.h5,3788257.105734832,30.298559347788494,Daytime,2280416.8350887266 +processed_ATL03_20200318192523_12690602_006_01.h5,1351106.399095953,37.82273376543905,Daytime,2664819.338864546 +processed_ATL03_20200322191701_13300602_006_01.h5,4055652.4726818614,40.80553877688595,Daytime,2835030.833578937 +processed_ATL03_20200325072428_13680606_006_01.h5,276003.2899430346,-38.01975886027018,Nighttime,0.0 +processed_ATL03_20200326190838_00040702_006_01.h5,1214166.848817978,44.136398494099446,Daytime,3038283.9830246638 +processed_ATL03_20200329071609_00420706_006_01.h5,40921.18564560988,-38.085152261671595,Nighttime,0.0 +processed_ATL03_20200330190017_00650702_006_01.h5,2897362.4389916896,47.033739194511774,Daytime,3227154.257593272 +processed_ATL03_20200403185200_01260702_006_03.h5,1422029.572889437,50.41658441740958,Daytime,3462884.733692616 +processed_ATL03_20200416180123_03240702_006_02.h5,1212471.3817819578,56.88941257443048,Daytime,3963956.1596805067 +processed_ATL03_20200419060855_03620706_006_02.h5,11336.364038336214,-36.11201312276911,Nighttime,0.0 +processed_ATL03_20200420175305_03850702_006_02.h5,8790328.564517578,59.43912149047851,Daytime,4181022.0474315835 +processed_ATL03_20200423060037_04230706_006_02.h5,13875.464784802263,-36.77773346454934,Nighttime,0.0 +processed_ATL03_20200424174447_04460702_006_02.h5,11769916.10352509,62.44312992182514,Daytime,4452302.924574176 +processed_ATL03_20200427055215_04840706_006_02.h5,24467.29998703863,-37.28700614391692,Nighttime,0.0 +processed_ATL03_20200428173625_05070702_006_02.h5,5890936.079037134,64.92542139996337,Daytime,4689895.710371923 +processed_ATL03_20200501054353_05450706_006_01.h5,27995.160394601022,-35.154712923723665,Nighttime,0.0 +processed_ATL03_20200502172804_05680702_006_01.h5,1858938.9999125663,66.71920158729003,Daytime,4869548.371812116 +processed_ATL03_20200514045317_07430706_006_01.h5,101200.1387867376,-33.12479663476581,Nighttime,0.0 +processed_ATL03_20200515163727_07660702_006_01.h5,4029271.209775364,70.89727247756439,Daytime,5315491.960643039 +processed_ATL03_20200518044459_08040706_006_01.h5,21390.274746852945,-32.48489073114458,Nighttime,0.0 +processed_ATL03_20200519162909_08270702_006_01.h5,11263527.80896702,71.30119411230662,Daytime,5360737.751993225 +processed_ATL03_20200522043639_08650706_006_01.h5,27726.586265306785,-30.78756402104017,Nighttime,0.0 +processed_ATL03_20200523162049_08880702_006_01.h5,2394186.0851803604,71.34785883815056,Daytime,5365989.999505284 +processed_ATL03_20200526042817_09260706_006_01.h5,192063.31330177086,-30.286723850078406,Nighttime,0.0 +processed_ATL03_20200527161228_09490702_006_01.h5,3866876.0375585253,70.91486510319069,Daytime,5317454.551888833 +processed_ATL03_20200530041958_09870706_006_01.h5,14797.137764492889,-28.866483673855846,Nighttime,0.0 +processed_ATL03_20200531160408_10100702_006_01.h5,2673537.075175427,69.99713837967872,Daytime,5216046.879222733 +processed_ATL03_20200608033738_11240706_006_01.h5,34564.247899867914,-27.469124403873938,Nighttime,0.0 +processed_ATL03_20200609152147_11470702_006_01.h5,1473215.054213498,66.15265682069996,Daytime,4812066.45406627 +processed_ATL03_20200612032917_11850706_006_01.h5,38634.13533752579,-26.62868586505384,Nighttime,0.0 +processed_ATL03_20200613151327_12080702_006_01.h5,6435067.604220629,64.68456579542953,Daytime,4666289.948981485 +processed_ATL03_20200616032058_12460706_006_01.h5,16662.513664877562,-24.48468769049549,Nighttime,0.0 +processed_ATL03_20200617150508_12690702_006_01.h5,8234690.942282691,62.90583281198949,Daytime,4495646.639719844 +processed_ATL03_20200620031238_13070706_006_01.h5,78310.58091236708,-23.989503564854612,Nighttime,0.0 +processed_ATL03_20200621145648_13300702_006_01.h5,2091374.7284403527,60.61197065362462,Daytime,4284884.418061689 +processed_ATL03_20200624030418_13680706_006_01.h5,1450.7135090318498,-23.243765727303217,Nighttime,0.0 +processed_ATL03_20200625144828_00040802_006_01.h5,6557029.969762381,58.23505245263165,Daytime,4077050.837044342 +processed_ATL03_20200628025559_00420806_006_01.h5,110170.96579499701,-22.114351442730076,Nighttime,0.0 +processed_ATL03_20200629144009_00650802_006_01.h5,1611271.1367206054,55.93990911496594,Daytime,3886076.4388880646 +processed_ATL03_20200703143148_01260802_006_01.h5,1517288.3956790732,53.58790397706497,Daytime,3699787.295922931 +processed_ATL03_20200707021337_01790806_006_01.h5,45946.903156638844,-19.85720067705427,Nighttime,0.0 +processed_ATL03_20200711020519_02400806_006_01.h5,37795.75206357141,-18.332053259394385,Nighttime,0.0 +processed_ATL03_20200712134930_02630802_006_01.h5,2751507.4577635746,47.60112804428408,Daytime,3265520.353254656 +processed_ATL03_20200715015701_03010806_006_01.h5,177058.97577769912,-17.245845284873063,Nighttime,0.0 +processed_ATL03_20200716134112_03240802_006_01.h5,4225216.975207497,44.99416261486278,Daytime,3092994.3084492055 +processed_ATL03_20200719014842_03620806_006_01.h5,84038.4457408923,-16.165504271212004,Nighttime,0.0 +processed_ATL03_20200720133252_03850802_006_01.h5,1086128.2890839668,42.32534367015925,Daytime,2925990.783412471 +processed_ATL03_20200723014022_04230806_006_01.h5,79140.35345762987,-15.127771424350552,Nighttime,0.0 +processed_ATL03_20200724132432_04460802_006_01.h5,3827796.7701561344,39.720189704580584,Daytime,2771845.5833431105 +processed_ATL03_20200727013202_04840806_006_01.h5,62161.319765603504,-14.107249783170584,Nighttime,0.0 +processed_ATL03_20200728131612_05070802_006_01.h5,1088091.083936827,36.889148079226366,Daytime,2613721.4970187624 +processed_ATL03_20200801130752_05680802_006_01.h5,4673920.943899507,34.1490156007712,Daytime,2469462.715856148 +processed_ATL03_20200809004122_06820806_006_01.h5,179494.59699320886,-10.638800908295066,Nighttime,0.0 +processed_ATL03_20200810122532_07050802_006_01.h5,1068897.9927847406,27.84727919411428,Daytime,2167870.314943968 +processed_ATL03_20200813003303_07430806_006_01.h5,42490.099909907636,-9.501473152301626,Nighttime,0.0 +processed_ATL03_20200814121713_07660802_006_01.h5,5752663.929646432,25.0592073406471,Daytime,2046779.8760011417 +processed_ATL03_20200817002443_08040806_006_01.h5,16617.049963783167,-8.673657633989055,Nighttime,0.0 +processed_ATL03_20200818120853_08270802_006_01.h5,1094279.5481663574,22.218095326033673,Daytime,1930540.7399213663 +processed_ATL03_20200821001623_08650806_006_01.h5,88550.37502800443,-7.514235541809514,Nighttime,0.0 +processed_ATL03_20200822120034_08880802_006_01.h5,3775479.618464736,19.388708499379472,Daytime,1821542.1003826784 +processed_ATL03_20200825000804_09260806_006_01.h5,89446.81397876945,-6.454590820296701,Nighttime,0.0 +processed_ATL03_20200826115214_09490802_006_01.h5,872472.4922581951,16.536142382767128,Daytime,1718080.7773801952 +processed_ATL03_20200830114354_10100802_006_01.h5,754567.2350125939,13.67799809981165,Daytime,1620512.943928909 +processed_ATL03_20200903113534_10710802_006_02.h5,581694.4312176822,10.853443807332303,Daytime,1529735.0857654258 +processed_ATL03_20200906231725_11240806_006_02.h5,33649.48062353326,-3.1481845660217744,Twilight,144776.66851293913 +processed_ATL03_20200908110135_11470802_006_02.h5,503253.43356873037,7.203728281765667,Daytime,1420215.108060837 +processed_ATL03_20200912105316_12080802_006_02.h5,341280.07894445007,4.380522070205796,Twilight,138467.63856118967 +processed_ATL03_20200914230046_12460806_006_02.h5,26186.26481486116,-0.9621860267135545,Twilight,151470.7608601518 +processed_ATL03_20200916104456_12690802_006_02.h5,125950.94852182808,1.6256601045111767,Twilight,150144.69402303346 +processed_ATL03_20200918225226_13070806_006_02.h5,125792.44703489641,0.31131358201008213,Twilight,152119.1293011806 +processed_ATL03_20200920103636_13300802_006_02.h5,210471.61413558375,-1.425258747820212,Twilight,150614.29497198254 +processed_ATL03_20200922224407_13680806_006_02.h5,172714.08971251204,1.1941339462355751,Twilight,151082.12390992098 +processed_ATL03_20200924102817_00040902_006_02.h5,5404.008946849824,-4.534453156049597,Twilight,137583.17476423405 +processed_ATL03_20200926223548_00420906_006_02.h5,319256.5753720403,2.0764338337093933,Twilight,148877.46058688662 +processed_ATL03_20200928101958_00650902_006_02.h5,42946.540686569366,-7.493123294715595,Nighttime,0.0 +processed_ATL03_20201002101131_01260902_006_01.h5,362.1598100051391,-10.500501094674165,Nighttime,0.0 +processed_ATL03_20201005215324_01790906_006_01.h5,295125.674828767,4.415555006044275,Twilight,138267.9713919565 +processed_ATL03_20201009214506_02400906_006_01.h5,331640.74092138046,5.289189014523632,Twilight,133008.95080971622 +processed_ATL03_20201011092916_02630902_006_01.h5,3016.8440006209767,-17.102435897122042,Nighttime,0.0 +processed_ATL03_20201013213647_03010906_006_01.h5,974888.7458435383,6.2987469021792695,Daytime,1394346.0891712194 +processed_ATL03_20201015092057_03240902_006_01.h5,96992.74215494073,-20.071210913659744,Nighttime,0.0 +processed_ATL03_20201017212828_03620906_006_01.h5,379385.6656480939,7.3146814146422745,Daytime,1423421.0499189848 +processed_ATL03_20201019091238_03850902_006_01.h5,30239.096275285712,-23.040433986865988,Nighttime,0.0 +processed_ATL03_20201021212007_04230906_006_01.h5,539059.9907695217,8.348818287791644,Daytime,1453667.9109723903 +processed_ATL03_20201023090418_04460902_006_01.h5,259884.82496922815,-26.032223481266826,Nighttime,0.0 +processed_ATL03_20201025211148_04840906_006_01.h5,2574614.756669485,9.36837614095687,Daytime,1484145.3995965777 +processed_ATL03_20201027085559_05070902_006_01.h5,469484.5235946693,-29.269784732699847,Nighttime,0.0 +processed_ATL03_20201029210331_05450906_006_01.h5,1521040.160322414,9.962021725053399,Daytime,1502197.345648468 +processed_ATL03_20201031084740_05680902_006_01.h5,16340.668559343276,-31.71451310202092,Nighttime,0.0 +processed_ATL03_20201107202110_06820906_006_01.h5,674488.877680544,12.858877914241898,Daytime,1593624.7953063964 +processed_ATL03_20201109080520_07050902_006_01.h5,167326.14826736267,-38.60647640952581,Nighttime,0.0 +processed_ATL03_20201111201251_07430906_006_01.h5,1169250.8182872327,14.147239563596255,Daytime,1636128.0126327383 +processed_ATL03_20201113075702_07660902_006_01.h5,224.37694479006203,-41.561702853043876,Nighttime,0.0 +processed_ATL03_20201115200432_08040906_006_01.h5,1398742.022021409,15.108680614291737,Daytime,1668612.0336912915 +processed_ATL03_20201117074843_08270902_006_01.h5,18312.102258590567,-44.48965702690624,Nighttime,0.0 +processed_ATL03_20201119195613_08650906_006_01.h5,737208.2543954616,16.10340274771491,Daytime,1702925.5465857992 +processed_ATL03_20201121074024_08880902_006_01.h5,28627.621943633458,-47.40913550755529,Nighttime,0.0 +processed_ATL03_20201123194754_09260906_006_01.h5,682141.6288944753,17.083277940723377,Daytime,1737442.6729897463 +processed_ATL03_20201125073204_09490902_006_01.h5,45514.46755107935,-50.32221010594868,Nighttime,0.0 +processed_ATL03_20201127193934_09870906_006_01.h5,2059743.4233440266,18.298593901893994,Daytime,1781262.9064303343 +processed_ATL03_20201129072343_10100902_006_01.h5,156814.4450448852,-53.07099330253307,Nighttime,0.0 +processed_ATL03_20201206185715_11240906_006_01.h5,4465255.531292323,20.777068463904033,Daytime,1874209.24194811 +processed_ATL03_20201208064125_11470902_006_01.h5,36.89948285143027,-59.01391457703154,Nighttime,0.0 +processed_ATL03_20201210184856_11850906_006_01.h5,1565148.2148421055,21.865176377134688,Daytime,1916584.987327542 +processed_ATL03_20201212063308_12080902_006_01.h5,31776.705634769776,-61.83763375374974,Nighttime,0.0 +processed_ATL03_20201214184038_12460906_006_01.h5,6440840.810459436,22.93182274132584,Daytime,1959085.7276236292 +processed_ATL03_20201216062448_12690902_006_01.h5,7292.979228897971,-64.54162620939094,Nighttime,0.0 +processed_ATL03_20201218183218_13070906_006_01.h5,2698236.5121689388,23.989010371281957,Daytime,2002169.7726469506 +processed_ATL03_20201220061628_13300902_006_01.h5,10407.229098666925,-66.92680416509755,Nighttime,0.0 +processed_ATL03_20201222182358_13680906_006_01.h5,3179819.8703778996,25.082950329887492,Daytime,2047781.1142433893 +processed_ATL03_20201226181538_00421006_006_01.h5,947034.319638348,26.143279988765048,Daytime,2093013.3941425597 +processed_ATL03_20201228055948_00651002_006_01.h5,41673.45850649981,-70.68798426405911,Nighttime,0.0 diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation.png b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation.png new file mode 100644 index 0000000..75f4454 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation.png differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2021.png b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2021.png new file mode 100644 index 0000000..5db39f4 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2021.png differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2022_new.png b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2022_new.png new file mode 100644 index 0000000..73c29e5 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_2022_new.png differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_exp_fit.png b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_exp_fit.png new file mode 100644 index 0000000..56c02bf Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/background_vs_elevation_exp_fit.png differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/calibration_example.png b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/calibration_example.png new file mode 100644 index 0000000..fec0499 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/Dataset/Results/calibration_example.png differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/IS2_BG_Rate_Land_Ocean_plot.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/IS2_BG_Rate_Land_Ocean_plot.py new file mode 100644 index 0000000..4b80bbf --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/IS2_BG_Rate_Land_Ocean_plot.py @@ -0,0 +1,483 @@ +import h5py +import os +import numpy as np +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point +from scipy.interpolate import interp1d +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +from multiprocessing import Pool +from config import get_args +from noise_utils.interpolation import geoid_correction, refraction_correction, interpolate_labels, apply_interpolation +from noise_utils.data_processing import isolate_sea_land_photons +import random # Added for random selection + +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Define model functions +def power_model(x, a, b, c): + return a * np.power(x, b) + c + +def exp_model(x, a, b, c): + """Exponential model: a * exp(b * x) + c""" + return a * np.exp(b * x) + c + +def twilight_model(x, a, b, c): + return a * np.exp(-b * x**2) + c # Gaussian form for twilight + +def process_file(args): + """Process a single ATL03 file, skipping files missing required datasets.""" + file, beam, shoreline_data_path = args + try: + with h5py.File(file, 'r') as f: + # Check for required datasets + required_keys = [ + f'{beam}/bckgrd_atlas/bckgrd_rate', + f'{beam}/bckgrd_atlas/delta_time', + f'{beam}/heights/delta_time', + f'{beam}/heights/h_ph', + f'{beam}/geolocation/reference_photon_lat', + f'{beam}/geolocation/reference_photon_lon', + f'{beam}/geolocation/solar_azimuth', + f'{beam}/geolocation/solar_elevation' + ] + for key in required_keys: + if key not in f: + raise KeyError(f"Missing dataset: {key}") + + bckgrd_rate = f[f'{beam}/bckgrd_atlas/bckgrd_rate'][:].astype(np.float64) + bckgrd_time = f[f'{beam}/bckgrd_atlas/delta_time'][:] + heights = f[f'{beam}/heights/h_ph'][:] + segment_lat = f[f'{beam}/geolocation/reference_photon_lat'][:] + segment_lon = f[f'{beam}/geolocation/reference_photon_lon'][:] + segment_delta_time = f[f'{beam}/geolocation/delta_time'][:] + + Segment_Is_Land = pd.DataFrame({ + 'latitude': segment_lat, + 'longitude': segment_lon, + 'delta_time': segment_delta_time + }) + + # Create a GeoDataFrame with geometry + Segment_Is_Land['geometry'] = gpd.points_from_xy(Segment_Is_Land['longitude'], Segment_Is_Land['latitude']) + ICESat2_GDF = gpd.GeoDataFrame(Segment_Is_Land, crs="EPSG:4326") + + # Determine land/ocean classification + Segment_Is_Land_Labels = isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF) + ICESat2_GDF['is_land'] = Segment_Is_Land_Labels + + # Interpolate is_land labels to bckgrd_time + interp_is_land = interp1d( + ICESat2_GDF['delta_time'], + ICESat2_GDF['is_land'].astype(int), + bounds_error=False, + fill_value=(ICESat2_GDF['is_land'].astype(int).iloc[0], ICESat2_GDF['is_land'].astype(int).iloc[-1]) + ) + is_land_interpolated = interp_is_land(bckgrd_time).round().astype(int) + + # Compute total background rates for land and ocean + land_mask = is_land_interpolated == 1 + ocean_mask = is_land_interpolated == 0 + total_background_rate_land = np.mean(bckgrd_rate[land_mask]) if np.sum(land_mask) > 0 else np.nan + total_background_rate_ocean = np.mean(bckgrd_rate[ocean_mask]) if np.sum(ocean_mask) > 0 else np.nan + + # Solar elevation handling + FILL_VALUE = 3.4028235e38 + solar_elevations = f[f'{beam}/geolocation/solar_elevation'][:].astype(np.float64) + valid_solar = solar_elevations[(solar_elevations != FILL_VALUE) & (np.abs(solar_elevations) < 1e30) & np.isfinite(solar_elevations)] + solar_elevation = np.nan if valid_solar.size == 0 else np.mean(valid_solar) + + mean_photon_height = np.mean(heights) if heights.size > 0 else np.nan + + result = { + 'file': os.path.basename(file), + 'total_background_rate_land': total_background_rate_land, + 'total_background_rate_ocean': total_background_rate_ocean, + 'solar_elevation': solar_elevation, + 'mean_photon_height': mean_photon_height + } + logger.info(f"Processed {file}: {result}") + return result + except Exception as e: + logger.error(f"Error processing {file}: {str(e)}") + return {'file': os.path.basename(file), 'error': str(e)} + +def fit_surface_model(df, classification, surface_type, model_func, p0, bounds, x_col='solar_elevation', y_col='solar_background_rate'): + """Fit a model to a specific classification and surface type.""" + subset = df[(df['classification'] == classification) & (df['surface_type'] == surface_type)].copy() + if len(subset) < 3: + print(f"Warning: Insufficient data for {classification} {surface_type} model.") + return None, None, None + + mask = subset[x_col].notna() & subset[y_col].notna() & np.isfinite(subset[x_col]) & np.isfinite(subset[y_col]) + valid_x = subset.loc[mask, x_col] + valid_y = subset.loc[mask, y_col] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for {classification} {surface_type} model.") + return None, None, None + + try: + popt, _ = curve_fit(model_func, valid_x, valid_y, p0=p0, bounds=bounds, maxfev=20000) + y_pred = model_func(valid_x, *popt) + r2 = 1 - np.sum((valid_y - y_pred)**2) / np.sum((valid_y - np.mean(valid_y))**2) + return popt, r2, lambda x: model_func(x, *popt) + except RuntimeError as e: + print(f"Fit failed for {classification} {surface_type}: {e}") + return None, None, None + +def load_and_process_files(atl03_directory, beam, shoreline_data_path): + """Load and process ATL03 files in parallel.""" + atl03_files = [os.path.join(atl03_directory, f) for f in os.listdir(atl03_directory) if f.endswith('.h5') and 'ATL03' in f] + print(f"Found {len(atl03_files)} ATL03 files.") + + if not atl03_files: + raise ValueError("No ATL03 files found in the directory.") + + with Pool() as pool: + results = pool.map(process_file, [(f, beam, shoreline_data_path) for f in atl03_files]) + + print("Results from process_file:", results) + df = pd.DataFrame(results) + + if 'error' in df.columns: + error_files = df[df['error'].notna()]['file'].tolist() + print(f"Errors in {len(error_files)} files: {error_files}") + df = df[df['error'].isna()].drop(columns=['error']) + + if df.empty: + raise ValueError("No files were successfully processed. Check the error messages above.") + + print("DataFrame columns after processing:", df.columns) + return df + +def assign_surface_type(df): + """Assign surface type based on background rates.""" + required_columns = ['total_background_rate_land', 'total_background_rate_ocean'] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise KeyError(f"Missing required columns in DataFrame: {missing_columns}") + + df['surface_type'] = np.where( + df['total_background_rate_land'].notna() & (df['total_background_rate_land'] > 0), + 'Land', + np.where(df['total_background_rate_ocean'].notna() & (df['total_background_rate_ocean'] > 0), 'Ocean', 'Mixed') + ) + return df + +def classify_data(df, twilight_threshold=6.0): + """Classify data into Daytime, Nighttime, Twilight, or Unknown.""" + df['classification'] = np.where( + df['solar_elevation'].isna(), 'Unknown', + np.where(df['solar_elevation'] > twilight_threshold, 'Daytime', + np.where(df['solar_elevation'] < -twilight_threshold, 'Nighttime', 'Twilight')) + ) + daytime_df = df[df['classification'] == 'Daytime'].copy() + nighttime_df = df[df['classification'] == 'Nighttime'].copy() + twilight_df = df[df['classification'] == 'Twilight'].copy() + unknown_df = df[df['classification'] == 'Unknown'].copy() + print(f"Daytime: {len(daytime_df)}, Nighttime: {len(nighttime_df)}, Twilight: {len(twilight_df)}, Unknown: {len(unknown_df)}") + return df, daytime_df, nighttime_df, twilight_df, unknown_df + +def calculate_non_solar_rate(nighttime_df): + """Calculate the non-solar background rate from nighttime data.""" + non_solar_rate_land = np.nanmean(nighttime_df['total_background_rate_land']) + non_solar_rate_ocean = np.nanmean(nighttime_df['total_background_rate_ocean']) + non_solar_rate = np.nanmean([non_solar_rate_land, non_solar_rate_ocean]) + print(f"Non-solar background rate (land): {non_solar_rate_land:.2e} ph/s") + print(f"Non-solar background rate (ocean): {non_solar_rate_ocean:.2e} ph/s") + print(f"Non-solar background rate (combined): {non_solar_rate:.2e} ph/s") + return non_solar_rate + +def compute_solar_background_rates(df, non_solar_rate): + """Compute solar background rates for land and ocean.""" + df['solar_background_rate_land'] = (df['total_background_rate_land'] - non_solar_rate).clip(lower=0) + df['solar_background_rate_ocean'] = (df['total_background_rate_ocean'] - non_solar_rate).clip(lower=0) + df['solar_background_rate'] = np.where( + df['surface_type'] == 'Land', df['solar_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['solar_background_rate_ocean'], np.nan) + ) + return df + +def fit_daytime_model(df, daytime_df, non_solar_rate): + """Fit daytime models for land and ocean.""" + daytime_models = {} + for surface_type in ['Land', 'Ocean']: + subset = daytime_df[daytime_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient daytime {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_x = subset.loc[mask, 'solar_elevation'] + valid_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for daytime {surface_type} model.") + continue + + # Remove outliers + log_y = np.log10(valid_y + 1) + mean_log_y = np.mean(log_y) + std_log_y = np.std(log_y) + outlier_mask = (log_y > mean_log_y - 3 * std_log_y) & (log_y < mean_log_y + 3 * std_log_y) + valid_x = valid_x[outlier_mask] + valid_y = valid_y[outlier_mask] + + print(f"Daytime regression ({surface_type}): {len(valid_x)} valid data points") + try: + p0_exp = [np.max(valid_y) - non_solar_rate, 0.05, non_solar_rate] + bounds_exp = ([0, 0.01, non_solar_rate * 0.9], [1e8, 0.5, non_solar_rate * 1.1]) + popt_exp, _ = curve_fit(exp_model, valid_x, valid_y, p0=p0_exp, bounds=bounds_exp, maxfev=20000) + r2_exp = 1 - np.sum((valid_y - exp_model(valid_x, *popt_exp))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + x_shifted = valid_x - min(valid_x) + 1 + p0_power = [np.max(valid_y) - non_solar_rate, 0.6, non_solar_rate] + bounds_power = ([0, 0.1, non_solar_rate * 0.9], [1e8, 1.0, non_solar_rate * 1.1]) + popt_power, _ = curve_fit(power_model, x_shifted, valid_y, p0=p0_power, bounds=bounds_power, maxfev=20000) + r2_power = 1 - np.sum((valid_y - power_model(x_shifted, *popt_power))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + if r2_power > r2_exp: + print(f"Daytime model ({surface_type}, power law): rate = {popt_power[0]:.2e} * solar_elevation^{popt_power[1]:.2f} + {popt_power[2]:.2e}, R² = {r2_power:.3f}") + daytime_models[surface_type] = {'params': popt_power, 'r2': r2_power, 'model': lambda x: power_model(x - min(valid_x) + 1, *popt_power)} + else: + print(f"Daytime model ({surface_type}, exponential): rate = {popt_exp[0]:.2e} * exp({popt_exp[1]:.2f} * solar_elevation) + {popt_exp[2]:.2e}, R² = {r2_exp:.3f}") + daytime_models[surface_type] = {'params': popt_exp, 'r2': r2_exp, 'model': lambda x: exp_model(x, *popt_exp)} + + # Apply model + daytime_mask = (df['classification'] == 'Daytime') & (df['surface_type'] == surface_type) + valid_daytime_mask = daytime_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_daytime_mask, 'solar_elevation'] + if r2_power > r2_exp: + x_for_model_shifted = x_for_model - min(valid_x) + 1 + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model_shifted).clip(lower=0) + else: + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[daytime_mask & ~valid_daytime_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Daytime model fitting failed for {surface_type}: {e}. Using non-solar rate as fallback.") + df.loc[(df['classification'] == 'Daytime') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + return df, daytime_models + +def fit_twilight_model(df, twilight_df, non_solar_rate): + """Fit twilight models for land and ocean.""" + twilight_models = {} + if len(twilight_df) > 3: + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = ( + df['solar_background_rate'].fillna(0).clip(lower=0) + ) + for surface_type in ['Land', 'Ocean']: + subset = twilight_df[twilight_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient twilight {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_twilight_x = subset.loc[mask, 'solar_elevation'] + valid_twilight_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_twilight_x) < 3: + print(f"Warning: Fewer than 3 valid points for twilight {surface_type} model.") + continue + + print(f"Twilight regression ({surface_type}): {len(valid_twilight_x)} valid data points") + try: + initial_a = np.max(valid_twilight_y) - non_solar_rate + initial_b = 0.01 + initial_c = non_solar_rate + p0 = [initial_a, initial_b, initial_c] + bounds = ([0, 0.001, non_solar_rate * 0.9], [1e6, 1.0, non_solar_rate * 1.1]) + + popt, _ = curve_fit(twilight_model, valid_twilight_x, valid_twilight_y, p0=p0, bounds=bounds, maxfev=50000) + y_pred = twilight_model(valid_twilight_x, *popt) + r_squared = 1 - np.sum((valid_twilight_y - y_pred)**2) / np.sum((valid_twilight_y - np.mean(valid_twilight_y))**2) + + if r_squared < 0: + mean_rate = np.mean(valid_twilight_y) + twilight_models[surface_type] = { + 'params': [mean_rate], + 'r2': 0.0, + 'model': lambda x: np.full_like(x, mean_rate) + } + print(f"Twilight model ({surface_type}) failed with R² = {r_squared:.3f}. Using constant model: {mean_rate:.2e} ph/s") + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + df.loc[valid_twilight_mask, 'solar_background_rate'] = mean_rate + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + else: + print(f"Twilight model ({surface_type}): rate = {popt[0]:.2e} * exp(-{popt[1]:.2f} * solar_elevation^2) + {popt[2]:.2e}, R² = {r_squared:.3f}") + twilight_models[surface_type] = {'params': popt, 'r2': r_squared, 'model': lambda x: twilight_model(x, *popt)} + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_twilight_mask, 'solar_elevation'] + df.loc[valid_twilight_mask, 'solar_background_rate'] = twilight_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Twilight model fitting failed for {surface_type}: {e}. Setting twilight rates to non-solar rate.") + df.loc[(df['classification'] == 'Twilight') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + else: + print("Too few twilight points for modeling. Setting twilight rates to non-solar rate.") + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = non_solar_rate + return df, twilight_models + +def create_diagnostic_plot(df, daytime_df, twilight_df, daytime_models, twilight_models, non_solar_rate, output_dir): + """Create diagnostic plot of background rate vs. solar elevation.""" + plt.figure(figsize=(10, 6)) + plt.scatter(df['solar_elevation'], df['total_background_rate'], c='gray', alpha=0.5, label='Total') + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Land']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Land']['solar_background_rate'], + c='blue', alpha=0.5, label='Daytime Land') + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='cyan', alpha=0.5, label='Daytime Ocean') + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Land']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Land']['solar_background_rate'], + c='orange', alpha=0.5, label='Twilight Land') + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='yellow', alpha=0.5, label='Twilight Ocean') + plt.axhline(non_solar_rate, color='r', linestyle='--', label=f'Non-Solar: {non_solar_rate:.2e} ph/s') + + # Plot daytime fits + for surface_type in daytime_models: + if daytime_models[surface_type]['model'] is not None: + x_fit = np.linspace(min(daytime_df['solar_elevation']), max(daytime_df['solar_elevation']), 100) + y_fit = daytime_models[surface_type]['model'](x_fit if 'power' not in daytime_models[surface_type] else x_fit - min(daytime_df['solar_elevation']) + 1) + plt.plot(x_fit, y_fit, label=f'Daytime {surface_type} Fit (R²={daytime_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + # Plot twilight fits + for surface_type in twilight_models: + if twilight_models[surface_type]['model'] is not None: + x_twilight = np.linspace(min(twilight_df['solar_elevation']), max(twilight_df['solar_elevation']), 100) + y_twilight = twilight_models[surface_type]['model'](x_twilight) + plt.plot(x_twilight, y_twilight, label=f'Twilight {surface_type} Fit (R²={twilight_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + plt.yscale('log') + plt.ylim(1e3, 1e8) + plt.xlabel('Solar Elevation (degrees)') + plt.ylabel('Background Rate (ph/s)') + plt.legend() + plt.title('Background Rate vs. Solar Elevation (Land vs. Ocean)') + plt.savefig(os.path.join(output_dir, "background_vs_elevation_exp_fit_2021.png")) + plt.close() + +def create_calibration_plot(df, daytime_df, output_dir): + """Create a calibration plot using mean photon height as a proxy.""" + if not daytime_df.empty: + sample_file = daytime_df.iloc[0] + heights = sample_file['mean_photon_height'] + if np.isfinite(heights): + solar_rate = sample_file['solar_background_rate'] + bin_size = 0.1 + hist, edges = np.histogram(np.array([heights] * 1000), bins=np.arange(min(heights) - 1, max(heights) + 1, bin_size)) + photons_per_bin = solar_rate * (1000 / np.sum(hist)) * bin_size / (max(heights) - min(heights) + 2) + calibrated_counts = np.maximum(hist - photons_per_bin, 0) + calibrated_heights = [] + for i, count in enumerate(calibrated_counts): + if count > 0: + calibrated_heights.extend(np.random.uniform(edges[i], edges[i+1], int(count))) + + plt.figure(figsize=(8, 6)) + plt.hist([heights] * 1000, bins=100, alpha=0.5, label='Original (Simulated)') + plt.hist(calibrated_heights, bins=100, alpha=0.5, label='Calibrated') + plt.xlabel('Photon Height (m)') + plt.ylabel('Count') + plt.legend() + plt.title(f'Calibration Example: {sample_file["file"]}') + plt.savefig(os.path.join(output_dir, "calibration_example.png")) + plt.close() + +def save_results(df, output_dir): + """Save the results to a CSV file.""" + df.to_csv(os.path.join(output_dir, "atl03_analysis_updated.csv"), index=False) + +# New function to calculate averages +def calculate_background_averages(daytime_df, nighttime_df): + """Calculate average background rates for daytime and nighttime, land and ocean.""" + # Calculate averages, ignoring NaN values + daytime_land_avg = np.nanmean(daytime_df['total_background_rate_land']) + daytime_ocean_avg = np.nanmean(daytime_df['total_background_rate_ocean']) + nighttime_land_avg = np.nanmean(nighttime_df['total_background_rate_land']) + nighttime_ocean_avg = np.nanmean(nighttime_df['total_background_rate_ocean']) + + # Log the averages + logger.info(f"Average Daytime Land Background Rate: {daytime_land_avg:.2e} ph/s") + logger.info(f"Average Daytime Ocean Background Rate: {daytime_ocean_avg:.2e} ph/s") + logger.info(f"Average Nighttime Land Background Rate: {nighttime_land_avg:.2e} ph/s") + logger.info(f"Average Nighttime Ocean Background Rate: {nighttime_ocean_avg:.2e} ph/s") + + return { + 'daytime_land': daytime_land_avg, + 'daytime_ocean': daytime_ocean_avg, + 'nighttime_land': nighttime_land_avg, + 'nighttime_ocean': nighttime_ocean_avg + } + +# New function to plot averages +def plot_background_averages(averages, output_dir): + """Plot the average background rates for daytime and nighttime, land and ocean.""" + labels = ['Daytime Land', 'Daytime Ocean', 'Nighttime Land', 'Nighttime Ocean'] + rates = [ + averages['daytime_land'] if not pd.isna(averages['daytime_land']) else 0, + averages['daytime_ocean'] if not pd.isna(averages['daytime_ocean']) else 0, + averages['nighttime_land'] if not pd.isna(averages['nighttime_land']) else 0, + averages['nighttime_ocean'] if not pd.isna(averages['nighttime_ocean']) else 0 + ] + + plt.figure(figsize=(8, 6)) + bars = plt.bar(labels, rates, color=['blue', 'cyan', 'purple', 'lightgreen']) + + plt.ylabel('Average Background Rate (ph/s)') + plt.title('Average Background Rate Comparison\n(Daytime vs. Nighttime, Land vs. Ocean)') + plt.yscale('log') + plt.ylim(1e3, 1e8) + + for bar in bars: + yval = bar.get_height() + plt.text(bar.get_x() + bar.get_width()/2, yval, f'{yval:.2e}', ha='center', va='bottom') + + plt.savefig(os.path.join(output_dir, "average_background_rate_comparison.png")) + plt.close() + logger.info("Average background rate comparison plot generated.") + +def analyze_icesat2_data(atl03_directory, beam='gt1l', twilight_threshold=6.0, output_dir="output", shoreline_data_path=None): + """Main analysis function orchestrating the workflow.""" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + df = load_and_process_files(atl03_directory, beam, shoreline_data_path) + if df.empty: + raise ValueError("No data processed successfully. Check logs for errors.") + + df = assign_surface_type(df) + + # Compute total_background_rate based on surface_type + df['total_background_rate'] = np.where( + df['surface_type'] == 'Land', df['total_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['total_background_rate_ocean'], np.nan) + ) + + df, daytime_df, nighttime_df, twilight_df, unknown_df = classify_data(df, twilight_threshold) + + averages = calculate_background_averages(daytime_df, nighttime_df) + plot_background_averages(averages, output_dir) + + +if __name__ == "__main__": + args = get_args() + atl03_directory = os.path.join(args.workspace_path, args.atl03_path) + shoreline_data_path = os.path.join(args.workspace_path, args.shoreline_data) + output_dir = os.path.join(args.workspace_path, args.output_path) + analyze_icesat2_data(atl03_directory, args.beam, output_dir=output_dir, shoreline_data_path=shoreline_data_path) + print("Analysis complete.") \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/__pycache__/config.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/SolarBckgrd/__pycache__/config.cpython-39.pyc new file mode 100644 index 0000000..877d5cd Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/__pycache__/config.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_nighttime_BG_rate_icesat2.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_nighttime_BG_rate_icesat2.py new file mode 100644 index 0000000..bded772 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_nighttime_BG_rate_icesat2.py @@ -0,0 +1,520 @@ +import h5py +import os +import numpy as np +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point +from scipy.interpolate import interp1d +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +from multiprocessing import Pool +from config import get_args +from noise_utils.interpolation import geoid_correction, refraction_correction, interpolate_labels, apply_interpolation +from noise_utils.data_processing import isolate_sea_land_photons + +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Define model functions +def power_model(x, a, b, c): + return a * np.power(x, b) + c + +def exp_model(x, a, b, c): + """Exponential model: a * exp(b * x) + c""" + return a * np.exp(b * x) + c + +def twilight_model(x, a, b, c): + return a * np.exp(-b * x**2) + c # Gaussian form for twilight + +def process_file(args): + """Process a single ATL03 file, skipping files missing required datasets.""" + file, beam, shoreline_data_path = args + try: + with h5py.File(file, 'r') as f: + # Check for required datasets + required_keys = [ + f'{beam}/bckgrd_atlas/bckgrd_rate', + f'{beam}/bckgrd_atlas/delta_time', + f'{beam}/heights/delta_time', + f'{beam}/heights/h_ph', + f'{beam}/geolocation/reference_photon_lat', + f'{beam}/geolocation/reference_photon_lon', + f'{beam}/geolocation/solar_azimuth', + f'{beam}/geolocation/solar_elevation' + ] + for key in required_keys: + if key not in f: + raise KeyError(f"Missing dataset: {key}") + + bckgrd_rate = f[f'{beam}/bckgrd_atlas/bckgrd_rate'][:].astype(np.float64) + bckgrd_time = f[f'{beam}/bckgrd_atlas/delta_time'][:] + heights = f[f'{beam}/heights/h_ph'][:] + segment_lat = f[f'{beam}/geolocation/reference_photon_lat'][:] + segment_lon = f[f'{beam}/geolocation/reference_photon_lon'][:] + segment_delta_time = f[f'{beam}/geolocation/delta_time'][:] + + Segment_Is_Land = pd.DataFrame({ + 'latitude': segment_lat, # Use unique column names to avoid conflicts + 'longitude': segment_lon, + 'delta_time': segment_delta_time + }) + + # Create a GeoDataFrame with geometry + Segment_Is_Land['geometry'] = gpd.points_from_xy(Segment_Is_Land['longitude'], Segment_Is_Land['latitude']) + ICESat2_GDF = gpd.GeoDataFrame(Segment_Is_Land, crs="EPSG:4326") + + # Determine land/ocean classification + Segment_Is_Land_Labels = isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF) + ICESat2_GDF['is_land'] = Segment_Is_Land_Labels + + # Interpolate is_land labels to bckgrd_time + interp_is_land = interp1d( + ICESat2_GDF['delta_time'], + ICESat2_GDF['is_land'].astype(int), + bounds_error=False, + fill_value=(ICESat2_GDF['is_land'].astype(int).iloc[0], ICESat2_GDF['is_land'].astype(int).iloc[-1]) + ) + is_land_interpolated = interp_is_land(bckgrd_time).round().astype(int) + + # ##########Debug################################# + # # Interpolate latitude and longitude to bckgrd_time + # interp_lat = interp1d( + # ICESat2_GDF['delta_time'], + # ICESat2_GDF['latitude'], + # bounds_error=False, + # fill_value=(ICESat2_GDF['latitude'].iloc[0], ICESat2_GDF['latitude'].iloc[-1]) + # ) + # interp_lon = interp1d( + # ICESat2_GDF['delta_time'], + # ICESat2_GDF['longitude'], + # bounds_error=False, + # fill_value=(ICESat2_GDF['longitude'].iloc[0], ICESat2_GDF['longitude'].iloc[-1]) + # ) + # interpolated_lat = interp_lat(bckgrd_time) + # interpolated_lon = interp_lon(bckgrd_time) + + # # Create a GeoDataFrame for is_land_interpolated with lat/lon + # is_land_df = pd.DataFrame({ + # 'delta_time': bckgrd_time, + # 'latitude': interpolated_lat, + # 'longitude': interpolated_lon, + # 'is_land': is_land_interpolated + # }) + # is_land_gdf = gpd.GeoDataFrame( + # is_land_df, + # geometry=gpd.points_from_xy(is_land_df['longitude'], is_land_df['latitude']), + # crs="EPSG:4326" + # ) + + # # Save to a GeoPackage file + # file_dir = os.path.dirname(file) + # is_land_output_dir = os.path.join(file_dir, "is_land_interpolated") + # if not os.path.exists(is_land_output_dir): + # os.makedirs(is_land_output_dir) + # output_filename = os.path.join(is_land_output_dir, f"{os.path.basename(file)}_is_land.gpkg") + # is_land_gdf.to_file(output_filename, driver="GPKG") + # logger.info(f"Saved is_land_interpolated to {output_filename}") + ############################################## + + # Compute total background rates for land and ocean + land_mask = is_land_interpolated == 1 + ocean_mask = is_land_interpolated == 0 + total_background_rate_land = np.mean(bckgrd_rate[land_mask]) if np.sum(land_mask) > 0 else np.nan + total_background_rate_ocean = np.mean(bckgrd_rate[ocean_mask]) if np.sum(ocean_mask) > 0 else np.nan + + # Solar elevation handling + FILL_VALUE = 3.4028235e38 + solar_elevations = f[f'{beam}/geolocation/solar_elevation'][:].astype(np.float64) + valid_solar = solar_elevations[(solar_elevations != FILL_VALUE) & (np.abs(solar_elevations) < 1e30) & np.isfinite(solar_elevations)] + solar_elevation = np.nan if valid_solar.size == 0 else np.mean(valid_solar) + + mean_photon_height = np.mean(heights) if heights.size > 0 else np.nan + + result = { + 'file': os.path.basename(file), + 'total_background_rate_land': total_background_rate_land, + 'total_background_rate_ocean': total_background_rate_ocean, + 'solar_elevation': solar_elevation, + 'mean_photon_height': mean_photon_height + } + logger.info(f"Processed {file}: {result}") + return result + except Exception as e: + logger.error(f"Error processing {file}: {str(e)}") + return {'file': os.path.basename(file), 'error': str(e)} + +def fit_surface_model(df, classification, surface_type, model_func, p0, bounds, x_col='solar_elevation', y_col='solar_background_rate'): + """Fit a model to a specific classification and surface type.""" + subset = df[(df['classification'] == classification) & (df['surface_type'] == surface_type)].copy() + if len(subset) < 3: + print(f"Warning: Insufficient data for {classification} {surface_type} model.") + return None, None, None + + mask = subset[x_col].notna() & subset[y_col].notna() & np.isfinite(subset[x_col]) & np.isfinite(subset[y_col]) + valid_x = subset.loc[mask, x_col] + valid_y = subset.loc[mask, y_col] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for {classification} {surface_type} model.") + return None, None, None + + try: + popt, _ = curve_fit(model_func, valid_x, valid_y, p0=p0, bounds=bounds, maxfev=20000) + y_pred = model_func(valid_x, *popt) + r2 = 1 - np.sum((valid_y - y_pred)**2) / np.sum((valid_y - np.mean(valid_y))**2) + return popt, r2, lambda x: model_func(x, *popt) + except RuntimeError as e: + print(f"Fit failed for {classification} {surface_type}: {e}") + return None, None, None + +def load_and_process_files(atl03_directory, beam, shoreline_data_path): + """Load and process ATL03 files in parallel.""" + atl03_files = [os.path.join(atl03_directory, f) for f in os.listdir(atl03_directory) if f.endswith('.h5') and 'ATL03' in f] + print(f"Found {len(atl03_files)} ATL03 files.") + + if not atl03_files: + raise ValueError("No ATL03 files found in the directory.") + + with Pool() as pool: + results = pool.map(process_file, [(f, beam, shoreline_data_path) for f in atl03_files]) + + print("Results from process_file:", results) # Debug output + df = pd.DataFrame(results) + + + if 'error' in df.columns: + error_files = df[df['error'].notna()]['file'].tolist() + print(f"Errors in {len(error_files)} files: {error_files}") + df = df[df['error'].isna()].drop(columns=['error']) + + if df.empty: + raise ValueError("No files were successfully processed. Check the error messages above.") + + print("DataFrame columns after processing:", df.columns) + return df + +def assign_surface_type(df): + """Assign surface type based on background rates.""" + required_columns = ['total_background_rate_land', 'total_background_rate_ocean'] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise KeyError(f"Missing required columns in DataFrame: {missing_columns}") + + df['surface_type'] = np.where( + df['total_background_rate_land'].notna() & (df['total_background_rate_land'] > 0), + 'Land', + np.where(df['total_background_rate_ocean'].notna() & (df['total_background_rate_ocean'] > 0), 'Ocean', 'Mixed') + ) + return df + +def classify_data(df, twilight_threshold=6.0): + """Classify data into Daytime, Nighttime, Twilight, or Unknown.""" + df['classification'] = np.where( + df['solar_elevation'].isna(), 'Unknown', + np.where(df['solar_elevation'] > twilight_threshold, 'Daytime', + np.where(df['solar_elevation'] < -twilight_threshold, 'Nighttime', 'Twilight')) + ) + daytime_df = df[df['classification'] == 'Daytime'].copy() + nighttime_df = df[df['classification'] == 'Nighttime'].copy() + twilight_df = df[df['classification'] == 'Twilight'].copy() + unknown_df = df[df['classification'] == 'Unknown'].copy() + print(f"Daytime: {len(daytime_df)}, Nighttime: {len(nighttime_df)}, Twilight: {len(twilight_df)}, Unknown: {len(unknown_df)}") + return df, daytime_df, nighttime_df, twilight_df, unknown_df + +def calculate_non_solar_rate(nighttime_df): + """Calculate the non-solar background rate from nighttime data.""" + non_solar_rate_land = np.nanmean(nighttime_df['total_background_rate_land']) + non_solar_rate_ocean = np.nanmean(nighttime_df['total_background_rate_ocean']) + non_solar_rate = np.nanmean([non_solar_rate_land, non_solar_rate_ocean]) + print(f"Non-solar background rate (land): {non_solar_rate_land:.2e} ph/s") + print(f"Non-solar background rate (ocean): {non_solar_rate_ocean:.2e} ph/s") + print(f"Non-solar background rate (combined): {non_solar_rate:.2e} ph/s") + return non_solar_rate + +def compute_solar_background_rates(df, non_solar_rate): + """Compute solar background rates for land and ocean.""" + df['solar_background_rate_land'] = (df['total_background_rate_land'] - non_solar_rate).clip(lower=0) + df['solar_background_rate_ocean'] = (df['total_background_rate_ocean'] - non_solar_rate).clip(lower=0) + df['solar_background_rate'] = np.where( + df['surface_type'] == 'Land', df['solar_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['solar_background_rate_ocean'], np.nan) + ) + return df + +def fit_daytime_model(df, daytime_df, non_solar_rate): + """Fit daytime models for land and ocean.""" + daytime_models = {} + for surface_type in ['Land', 'Ocean']: + subset = daytime_df[daytime_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient daytime {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_x = subset.loc[mask, 'solar_elevation'] + valid_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for daytime {surface_type} model.") + continue + + # Remove outliers + log_y = np.log10(valid_y + 1) + mean_log_y = np.mean(log_y) + std_log_y = np.std(log_y) + outlier_mask = (log_y > mean_log_y - 3 * std_log_y) & (log_y < mean_log_y + 3 * std_log_y) + valid_x = valid_x[outlier_mask] + valid_y = valid_y[outlier_mask] + + print(f"Daytime regression ({surface_type}): {len(valid_x)} valid data points") + try: + p0_exp = [np.max(valid_y) - non_solar_rate, 0.05, non_solar_rate] + bounds_exp = ([0, 0.01, non_solar_rate * 0.9], [1e8, 0.5, non_solar_rate * 1.1]) + popt_exp, _ = curve_fit(exp_model, valid_x, valid_y, p0=p0_exp, bounds=bounds_exp, maxfev=20000) + r2_exp = 1 - np.sum((valid_y - exp_model(valid_x, *popt_exp))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + x_shifted = valid_x - min(valid_x) + 1 + p0_power = [np.max(valid_y) - non_solar_rate, 0.6, non_solar_rate] + bounds_power = ([0, 0.1, non_solar_rate * 0.9], [1e8, 1.0, non_solar_rate * 1.1]) + popt_power, _ = curve_fit(power_model, x_shifted, valid_y, p0=p0_power, bounds=bounds_power, maxfev=20000) + r2_power = 1 - np.sum((valid_y - power_model(x_shifted, *popt_power))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + if r2_power > r2_exp: + print(f"Daytime model ({surface_type}, power law): rate = {popt_power[0]:.2e} * solar_elevation^{popt_power[1]:.2f} + {popt_power[2]:.2e}, R² = {r2_power:.3f}") + daytime_models[surface_type] = {'params': popt_power, 'r2': r2_power, 'model': lambda x: power_model(x - min(valid_x) + 1, *popt_power)} + else: + print(f"Daytime model ({surface_type}, exponential): rate = {popt_exp[0]:.2e} * exp({popt_exp[1]:.2f} * solar_elevation) + {popt_exp[2]:.2e}, R² = {r2_exp:.3f}") + daytime_models[surface_type] = {'params': popt_exp, 'r2': r2_exp, 'model': lambda x: exp_model(x, *popt_exp)} + + # Apply model + daytime_mask = (df['classification'] == 'Daytime') & (df['surface_type'] == surface_type) + valid_daytime_mask = daytime_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_daytime_mask, 'solar_elevation'] + if r2_power > r2_exp: + x_for_model_shifted = x_for_model - min(valid_x) + 1 + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model_shifted).clip(lower=0) + else: + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[daytime_mask & ~valid_daytime_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Daytime model fitting failed for {surface_type}: {e}. Using non-solar rate as fallback.") + df.loc[(df['classification'] == 'Daytime') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + return df, daytime_models + + +def fit_twilight_model(df, twilight_df, non_solar_rate): + """Fit twilight models for land and ocean.""" + twilight_models = {} + if len(twilight_df) > 3: + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = ( + df['solar_background_rate'].fillna(0).clip(lower=0) + ) + for surface_type in ['Land', 'Ocean']: + subset = twilight_df[twilight_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient twilight {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_twilight_x = subset.loc[mask, 'solar_elevation'] + valid_twilight_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_twilight_x) < 3: + print(f"Warning: Fewer than 3 valid points for twilight {surface_type} model.") + continue + + print(f"Twilight regression ({surface_type}): {len(valid_twilight_x)} valid data points") + try: + initial_a = np.max(valid_twilight_y) - non_solar_rate + initial_b = 0.01 # Smaller b for a flatter Gaussian + initial_c = non_solar_rate + p0 = [initial_a, initial_b, initial_c] + bounds = ([0, 0.001, non_solar_rate * 0.9], [1e6, 1.0, non_solar_rate * 1.1]) + + popt, _ = curve_fit(twilight_model, valid_twilight_x, valid_twilight_y, p0=p0, bounds=bounds, maxfev=50000) + y_pred = twilight_model(valid_twilight_x, *popt) + r_squared = 1 - np.sum((valid_twilight_y - y_pred)**2) / np.sum((valid_twilight_y - np.mean(valid_twilight_y))**2) + + if r_squared < 0: + # Fallback to a constant model (mean of the data) + mean_rate = np.mean(valid_twilight_y) + twilight_models[surface_type] = { + 'params': [mean_rate], + 'r2': 0.0, # R² of a constant model is 0 by definition + 'model': lambda x: np.full_like(x, mean_rate) + } + print(f"Twilight model ({surface_type}) failed with R² = {r_squared:.3f}. Using constant model: {mean_rate:.2e} ph/s") + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + df.loc[valid_twilight_mask, 'solar_background_rate'] = mean_rate + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + else: + print(f"Twilight model ({surface_type}): rate = {popt[0]:.2e} * exp(-{popt[1]:.2f} * solar_elevation^2) + {popt[2]:.2e}, R² = {r_squared:.3f}") + twilight_models[surface_type] = {'params': popt, 'r2': r_squared, 'model': lambda x: twilight_model(x, *popt)} + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_twilight_mask, 'solar_elevation'] + df.loc[valid_twilight_mask, 'solar_background_rate'] = twilight_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Twilight model fitting failed for {surface_type}: {e}. Setting twilight rates to non-solar rate.") + df.loc[(df['classification'] == 'Twilight') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + else: + print("Too few twilight points for modeling. Setting twilight rates to non-solar rate.") + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = non_solar_rate + return df, twilight_models + +def create_diagnostic_plot(df, daytime_df, twilight_df, nighttime_df, daytime_models, twilight_models, non_solar_rate, output_dir): + """Create diagnostic plot of background rate vs. solar elevation, including nighttime comparison.""" + plt.figure(figsize=(10, 6)) + + # Total background rate (all data) + plt.scatter(df['solar_elevation'], df['total_background_rate'], c='gray', alpha=0.5, label='Total') + + # Daytime data + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Land']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Land']['solar_background_rate'], + c='blue', alpha=0.5, label='Daytime Land') + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='cyan', alpha=0.5, label='Daytime Ocean') + + # Twilight data + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Land']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Land']['solar_background_rate'], + c='orange', alpha=0.5, label='Twilight Land') + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='yellow', alpha=0.5, label='Twilight Ocean') + + # Nighttime data (using total_background_rate since solar contribution is minimal) + plt.scatter(nighttime_df[nighttime_df['surface_type'] == 'Land']['solar_elevation'], + nighttime_df[nighttime_df['surface_type'] == 'Land']['total_background_rate'], + c='purple', alpha=0.5, label='Nighttime Land') + plt.scatter(nighttime_df[nighttime_df['surface_type'] == 'Ocean']['solar_elevation'], + nighttime_df[nighttime_df['surface_type'] == 'Ocean']['total_background_rate'], + c='lightgreen', alpha=0.5, label='Nighttime Ocean') + + # Non-solar background rate line + plt.axhline(non_solar_rate, color='r', linestyle='--', label=f'Non-Solar: {non_solar_rate:.2e} ph/s') + + # Plot daytime fits + for surface_type in daytime_models: + if daytime_models[surface_type]['model'] is not None: + x_fit = np.linspace(min(daytime_df['solar_elevation']), max(daytime_df['solar_elevation']), 100) + y_fit = daytime_models[surface_type]['model'](x_fit if 'power' not in daytime_models[surface_type] else x_fit - min(daytime_df['solar_elevation']) + 1) + plt.plot(x_fit, y_fit, label=f'Daytime {surface_type} Fit (R²={daytime_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + # Plot twilight fits + for surface_type in twilight_models: + if twilight_models[surface_type]['model'] is not None: + x_twilight = np.linspace(min(twilight_df['solar_elevation']), max(twilight_df['solar_elevation']), 100) + y_twilight = twilight_models[surface_type]['model'](x_twilight) + plt.plot(x_twilight, y_twilight, label=f'Twilight {surface_type} Fit (R²={twilight_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + # Set plot properties + plt.yscale('log') + plt.ylim(1e3, 1e8) + plt.xlabel('Solar Elevation (degrees)') + plt.ylabel('Background Rate (ph/s)') + plt.legend() + plt.title('Background Rate vs. Solar Elevation (Land vs. Ocean) with Nighttime Comparison') + + # Save the plot + plt.savefig(os.path.join(output_dir, "background_vs_elevation_with_nighttime_2021.png")) + plt.close() + +def create_calibration_plot(df, daytime_df, output_dir): + """Create a calibration plot using mean photon height as a proxy.""" + if not daytime_df.empty: + sample_file = daytime_df.iloc[0] + heights = sample_file['mean_photon_height'] + if np.isfinite(heights): + solar_rate = sample_file['solar_background_rate'] + bin_size = 0.1 + # Simulate histogram with 1000 points + hist, edges = np.histogram(np.array([heights] * 1000), bins=np.arange(min(heights) - 1, max(heights) + 1, bin_size)) + photons_per_bin = solar_rate * (1000 / np.sum(hist)) * bin_size / (max(heights) - min(heights) + 2) + calibrated_counts = np.maximum(hist - photons_per_bin, 0) + calibrated_heights = [] + for i, count in enumerate(calibrated_counts): + if count > 0: + calibrated_heights.extend(np.random.uniform(edges[i], edges[i+1], int(count))) + + plt.figure(figsize=(8, 6)) + plt.hist([heights] * 1000, bins=100, alpha=0.5, label='Original (Simulated)') + plt.hist(calibrated_heights, bins=100, alpha=0.5, label='Calibrated') + plt.xlabel('Photon Height (m)') + plt.ylabel('Count') + plt.legend() + plt.title(f'Calibration Example: {sample_file["file"]}') + plt.savefig(os.path.join(output_dir, "calibration_example.png")) + plt.close() + +def save_results(df, output_dir): + """Save the results to a CSV file.""" + df.to_csv(os.path.join(output_dir, "atl03_analysis_updated.csv"), index=False) + +def analyze_icesat2_data(atl03_directory, beam='gt1l', twilight_threshold=6.0, output_dir="output", shoreline_data_path=None): + """Main analysis function orchestrating the workflow.""" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + df = load_and_process_files(atl03_directory, beam, shoreline_data_path) + if df.empty: + raise ValueError("No data processed successfully. Check logs for errors.") + + df = assign_surface_type(df) + + # Compute total_background_rate based on surface_type + df['total_background_rate'] = np.where( + df['surface_type'] == 'Land', df['total_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['total_background_rate_ocean'], np.nan) + ) + + df, daytime_df, nighttime_df, twilight_df, unknown_df = classify_data(df, twilight_threshold) + + non_solar_rate = calculate_non_solar_rate(nighttime_df) + df = compute_solar_background_rates(df, non_solar_rate) + + # Recreate filtered DataFrames to include the updated columns from compute_solar_background_rates + daytime_df = df[df['classification'] == 'Daytime'].copy() + nighttime_df = df[df['classification'] == 'Nighttime'].copy() + twilight_df = df[df['classification'] == 'Twilight'].copy() + unknown_df = df[df['classification'] == 'Unknown'].copy() + + df, daytime_models = fit_daytime_model(df, daytime_df, non_solar_rate) + df, twilight_models = fit_twilight_model(df, twilight_df, non_solar_rate) + save_results(df, output_dir) + + # Updated call to include nighttime_df + create_diagnostic_plot(df, daytime_df, twilight_df, nighttime_df, daytime_models, twilight_models, non_solar_rate, output_dir) + # create_calibration_plot(df, daytime_df, output_dir) + + return df, { + 'daytime_models': daytime_models, + 'twilight_models': twilight_models, + 'non_solar_rate': non_solar_rate + } + +if __name__ == "__main__": + args = get_args() + atl03_directory = os.path.join(args.workspace_path, args.atl03_path) + shoreline_data_path = os.path.join(args.workspace_path, args.shoreline_data) + output_dir = os.path.join(args.workspace_path, args.output_path) + df, models = analyze_icesat2_data(atl03_directory, args.beam, output_dir=output_dir, shoreline_data_path=shoreline_data_path) + print("Analysis complete.") + + + + + \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2.py new file mode 100644 index 0000000..3701a3f --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2.py @@ -0,0 +1,496 @@ +import h5py +import os +import numpy as np +import pandas as pd +import geopandas as gpd +from shapely.geometry import Point +from scipy.interpolate import interp1d +from scipy.optimize import curve_fit +import matplotlib.pyplot as plt +from multiprocessing import Pool +from config import get_args +from noise_utils.interpolation import geoid_correction, refraction_correction, interpolate_labels, apply_interpolation +from noise_utils.data_processing import isolate_sea_land_photons + +import logging + +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Define model functions +def power_model(x, a, b, c): + return a * np.power(x, b) + c + +def exp_model(x, a, b, c): + """Exponential model: a * exp(b * x) + c""" + return a * np.exp(b * x) + c + +def twilight_model(x, a, b, c): + return a * np.exp(-b * x**2) + c # Gaussian form for twilight + +def process_file(args): + """Process a single ATL03 file, skipping files missing required datasets.""" + file, beam, shoreline_data_path = args + try: + with h5py.File(file, 'r') as f: + # Check for required datasets + required_keys = [ + f'{beam}/bckgrd_atlas/bckgrd_rate', + f'{beam}/bckgrd_atlas/delta_time', + f'{beam}/heights/delta_time', + f'{beam}/heights/h_ph', + f'{beam}/geolocation/reference_photon_lat', + f'{beam}/geolocation/reference_photon_lon', + f'{beam}/geolocation/solar_azimuth', + f'{beam}/geolocation/solar_elevation' + ] + for key in required_keys: + if key not in f: + raise KeyError(f"Missing dataset: {key}") + + bckgrd_rate = f[f'{beam}/bckgrd_atlas/bckgrd_rate'][:].astype(np.float64) + bckgrd_time = f[f'{beam}/bckgrd_atlas/delta_time'][:] + heights = f[f'{beam}/heights/h_ph'][:] + segment_lat = f[f'{beam}/geolocation/reference_photon_lat'][:] + segment_lon = f[f'{beam}/geolocation/reference_photon_lon'][:] + segment_delta_time = f[f'{beam}/geolocation/delta_time'][:] + + Segment_Is_Land = pd.DataFrame({ + 'latitude': segment_lat, # Use unique column names to avoid conflicts + 'longitude': segment_lon, + 'delta_time': segment_delta_time + }) + + # Create a GeoDataFrame with geometry + Segment_Is_Land['geometry'] = gpd.points_from_xy(Segment_Is_Land['longitude'], Segment_Is_Land['latitude']) + ICESat2_GDF = gpd.GeoDataFrame(Segment_Is_Land, crs="EPSG:4326") + + # Determine land/ocean classification + Segment_Is_Land_Labels = isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF) + ICESat2_GDF['is_land'] = Segment_Is_Land_Labels + + # Interpolate is_land labels to bckgrd_time + interp_is_land = interp1d( + ICESat2_GDF['delta_time'], + ICESat2_GDF['is_land'].astype(int), + bounds_error=False, + fill_value=(ICESat2_GDF['is_land'].astype(int).iloc[0], ICESat2_GDF['is_land'].astype(int).iloc[-1]) + ) + is_land_interpolated = interp_is_land(bckgrd_time).round().astype(int) + + # ##########Debug################################# + # # Interpolate latitude and longitude to bckgrd_time + # interp_lat = interp1d( + # ICESat2_GDF['delta_time'], + # ICESat2_GDF['latitude'], + # bounds_error=False, + # fill_value=(ICESat2_GDF['latitude'].iloc[0], ICESat2_GDF['latitude'].iloc[-1]) + # ) + # interp_lon = interp1d( + # ICESat2_GDF['delta_time'], + # ICESat2_GDF['longitude'], + # bounds_error=False, + # fill_value=(ICESat2_GDF['longitude'].iloc[0], ICESat2_GDF['longitude'].iloc[-1]) + # ) + # interpolated_lat = interp_lat(bckgrd_time) + # interpolated_lon = interp_lon(bckgrd_time) + + # # Create a GeoDataFrame for is_land_interpolated with lat/lon + # is_land_df = pd.DataFrame({ + # 'delta_time': bckgrd_time, + # 'latitude': interpolated_lat, + # 'longitude': interpolated_lon, + # 'is_land': is_land_interpolated + # }) + # is_land_gdf = gpd.GeoDataFrame( + # is_land_df, + # geometry=gpd.points_from_xy(is_land_df['longitude'], is_land_df['latitude']), + # crs="EPSG:4326" + # ) + + # # Save to a GeoPackage file + # file_dir = os.path.dirname(file) + # is_land_output_dir = os.path.join(file_dir, "is_land_interpolated") + # if not os.path.exists(is_land_output_dir): + # os.makedirs(is_land_output_dir) + # output_filename = os.path.join(is_land_output_dir, f"{os.path.basename(file)}_is_land.gpkg") + # is_land_gdf.to_file(output_filename, driver="GPKG") + # logger.info(f"Saved is_land_interpolated to {output_filename}") + ############################################## + + # Compute total background rates for land and ocean + land_mask = is_land_interpolated == 1 + ocean_mask = is_land_interpolated == 0 + total_background_rate_land = np.mean(bckgrd_rate[land_mask]) if np.sum(land_mask) > 0 else np.nan + total_background_rate_ocean = np.mean(bckgrd_rate[ocean_mask]) if np.sum(ocean_mask) > 0 else np.nan + + # Solar elevation handling + FILL_VALUE = 3.4028235e38 + solar_elevations = f[f'{beam}/geolocation/solar_elevation'][:].astype(np.float64) + valid_solar = solar_elevations[(solar_elevations != FILL_VALUE) & (np.abs(solar_elevations) < 1e30) & np.isfinite(solar_elevations)] + solar_elevation = np.nan if valid_solar.size == 0 else np.mean(valid_solar) + + mean_photon_height = np.mean(heights) if heights.size > 0 else np.nan + + result = { + 'file': os.path.basename(file), + 'total_background_rate_land': total_background_rate_land, + 'total_background_rate_ocean': total_background_rate_ocean, + 'solar_elevation': solar_elevation, + 'mean_photon_height': mean_photon_height + } + logger.info(f"Processed {file}: {result}") + return result + except Exception as e: + logger.error(f"Error processing {file}: {str(e)}") + return {'file': os.path.basename(file), 'error': str(e)} + +def fit_surface_model(df, classification, surface_type, model_func, p0, bounds, x_col='solar_elevation', y_col='solar_background_rate'): + """Fit a model to a specific classification and surface type.""" + subset = df[(df['classification'] == classification) & (df['surface_type'] == surface_type)].copy() + if len(subset) < 3: + print(f"Warning: Insufficient data for {classification} {surface_type} model.") + return None, None, None + + mask = subset[x_col].notna() & subset[y_col].notna() & np.isfinite(subset[x_col]) & np.isfinite(subset[y_col]) + valid_x = subset.loc[mask, x_col] + valid_y = subset.loc[mask, y_col] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for {classification} {surface_type} model.") + return None, None, None + + try: + popt, _ = curve_fit(model_func, valid_x, valid_y, p0=p0, bounds=bounds, maxfev=20000) + y_pred = model_func(valid_x, *popt) + r2 = 1 - np.sum((valid_y - y_pred)**2) / np.sum((valid_y - np.mean(valid_y))**2) + return popt, r2, lambda x: model_func(x, *popt) + except RuntimeError as e: + print(f"Fit failed for {classification} {surface_type}: {e}") + return None, None, None + +def load_and_process_files(atl03_directory, beam, shoreline_data_path): + """Load and process ATL03 files in parallel.""" + atl03_files = [os.path.join(atl03_directory, f) for f in os.listdir(atl03_directory) if f.endswith('.h5') and 'ATL03' in f] + print(f"Found {len(atl03_files)} ATL03 files.") + + if not atl03_files: + raise ValueError("No ATL03 files found in the directory.") + + with Pool() as pool: + results = pool.map(process_file, [(f, beam, shoreline_data_path) for f in atl03_files]) + + print("Results from process_file:", results) # Debug output + df = pd.DataFrame(results) + + + if 'error' in df.columns: + error_files = df[df['error'].notna()]['file'].tolist() + print(f"Errors in {len(error_files)} files: {error_files}") + df = df[df['error'].isna()].drop(columns=['error']) + + if df.empty: + raise ValueError("No files were successfully processed. Check the error messages above.") + + print("DataFrame columns after processing:", df.columns) + return df + +def assign_surface_type(df): + """Assign surface type based on background rates.""" + required_columns = ['total_background_rate_land', 'total_background_rate_ocean'] + missing_columns = [col for col in required_columns if col not in df.columns] + if missing_columns: + raise KeyError(f"Missing required columns in DataFrame: {missing_columns}") + + df['surface_type'] = np.where( + df['total_background_rate_land'].notna() & (df['total_background_rate_land'] > 0), + 'Land', + np.where(df['total_background_rate_ocean'].notna() & (df['total_background_rate_ocean'] > 0), 'Ocean', 'Mixed') + ) + return df + +def classify_data(df, twilight_threshold=6.0): + """Classify data into Daytime, Nighttime, Twilight, or Unknown.""" + df['classification'] = np.where( + df['solar_elevation'].isna(), 'Unknown', + np.where(df['solar_elevation'] > twilight_threshold, 'Daytime', + np.where(df['solar_elevation'] < -twilight_threshold, 'Nighttime', 'Twilight')) + ) + daytime_df = df[df['classification'] == 'Daytime'].copy() + nighttime_df = df[df['classification'] == 'Nighttime'].copy() + twilight_df = df[df['classification'] == 'Twilight'].copy() + unknown_df = df[df['classification'] == 'Unknown'].copy() + print(f"Daytime: {len(daytime_df)}, Nighttime: {len(nighttime_df)}, Twilight: {len(twilight_df)}, Unknown: {len(unknown_df)}") + return df, daytime_df, nighttime_df, twilight_df, unknown_df + +def calculate_non_solar_rate(nighttime_df): + """Calculate the non-solar background rate from nighttime data.""" + non_solar_rate_land = np.nanmean(nighttime_df['total_background_rate_land']) + non_solar_rate_ocean = np.nanmean(nighttime_df['total_background_rate_ocean']) + non_solar_rate = np.nanmean([non_solar_rate_land, non_solar_rate_ocean]) + print(f"Non-solar background rate (land): {non_solar_rate_land:.2e} ph/s") + print(f"Non-solar background rate (ocean): {non_solar_rate_ocean:.2e} ph/s") + print(f"Non-solar background rate (combined): {non_solar_rate:.2e} ph/s") + return non_solar_rate + +def compute_solar_background_rates(df, non_solar_rate): + """Compute solar background rates for land and ocean.""" + df['solar_background_rate_land'] = (df['total_background_rate_land'] - non_solar_rate).clip(lower=0) + df['solar_background_rate_ocean'] = (df['total_background_rate_ocean'] - non_solar_rate).clip(lower=0) + df['solar_background_rate'] = np.where( + df['surface_type'] == 'Land', df['solar_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['solar_background_rate_ocean'], np.nan) + ) + return df + +def fit_daytime_model(df, daytime_df, non_solar_rate): + """Fit daytime models for land and ocean.""" + daytime_models = {} + for surface_type in ['Land', 'Ocean']: + subset = daytime_df[daytime_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient daytime {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_x = subset.loc[mask, 'solar_elevation'] + valid_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_x) < 3: + print(f"Warning: Fewer than 3 valid points for daytime {surface_type} model.") + continue + + # Remove outliers + log_y = np.log10(valid_y + 1) + mean_log_y = np.mean(log_y) + std_log_y = np.std(log_y) + outlier_mask = (log_y > mean_log_y - 3 * std_log_y) & (log_y < mean_log_y + 3 * std_log_y) + valid_x = valid_x[outlier_mask] + valid_y = valid_y[outlier_mask] + + print(f"Daytime regression ({surface_type}): {len(valid_x)} valid data points") + try: + p0_exp = [np.max(valid_y) - non_solar_rate, 0.05, non_solar_rate] + bounds_exp = ([0, 0.01, non_solar_rate * 0.9], [1e8, 0.5, non_solar_rate * 1.1]) + popt_exp, _ = curve_fit(exp_model, valid_x, valid_y, p0=p0_exp, bounds=bounds_exp, maxfev=20000) + r2_exp = 1 - np.sum((valid_y - exp_model(valid_x, *popt_exp))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + x_shifted = valid_x - min(valid_x) + 1 + p0_power = [np.max(valid_y) - non_solar_rate, 0.6, non_solar_rate] + bounds_power = ([0, 0.1, non_solar_rate * 0.9], [1e8, 1.0, non_solar_rate * 1.1]) + popt_power, _ = curve_fit(power_model, x_shifted, valid_y, p0=p0_power, bounds=bounds_power, maxfev=20000) + r2_power = 1 - np.sum((valid_y - power_model(x_shifted, *popt_power))**2) / np.sum((valid_y - np.mean(valid_y))**2) + + if r2_power > r2_exp: + print(f"Daytime model ({surface_type}, power law): rate = {popt_power[0]:.2e} * solar_elevation^{popt_power[1]:.2f} + {popt_power[2]:.2e}, R² = {r2_power:.3f}") + daytime_models[surface_type] = {'params': popt_power, 'r2': r2_power, 'model': lambda x: power_model(x - min(valid_x) + 1, *popt_power)} + else: + print(f"Daytime model ({surface_type}, exponential): rate = {popt_exp[0]:.2e} * exp({popt_exp[1]:.2f} * solar_elevation) + {popt_exp[2]:.2e}, R² = {r2_exp:.3f}") + daytime_models[surface_type] = {'params': popt_exp, 'r2': r2_exp, 'model': lambda x: exp_model(x, *popt_exp)} + + # Apply model + daytime_mask = (df['classification'] == 'Daytime') & (df['surface_type'] == surface_type) + valid_daytime_mask = daytime_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_daytime_mask, 'solar_elevation'] + if r2_power > r2_exp: + x_for_model_shifted = x_for_model - min(valid_x) + 1 + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model_shifted).clip(lower=0) + else: + df.loc[valid_daytime_mask, 'solar_background_rate'] = daytime_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[daytime_mask & ~valid_daytime_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Daytime model fitting failed for {surface_type}: {e}. Using non-solar rate as fallback.") + df.loc[(df['classification'] == 'Daytime') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + return df, daytime_models + + +def fit_twilight_model(df, twilight_df, non_solar_rate): + """Fit twilight models for land and ocean.""" + twilight_models = {} + if len(twilight_df) > 3: + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = ( + df['solar_background_rate'].fillna(0).clip(lower=0) + ) + for surface_type in ['Land', 'Ocean']: + subset = twilight_df[twilight_df['surface_type'] == surface_type].copy() + if len(subset) < 3: + print(f"Warning: Insufficient twilight {surface_type} data for modeling.") + continue + + mask = subset['solar_elevation'].notna() & subset['solar_background_rate'].notna() & \ + np.isfinite(subset['solar_elevation']) & np.isfinite(subset['solar_background_rate']) + valid_twilight_x = subset.loc[mask, 'solar_elevation'] + valid_twilight_y = subset.loc[mask, 'solar_background_rate'] + + if len(valid_twilight_x) < 3: + print(f"Warning: Fewer than 3 valid points for twilight {surface_type} model.") + continue + + print(f"Twilight regression ({surface_type}): {len(valid_twilight_x)} valid data points") + try: + initial_a = np.max(valid_twilight_y) - non_solar_rate + initial_b = 0.01 # Smaller b for a flatter Gaussian + initial_c = non_solar_rate + p0 = [initial_a, initial_b, initial_c] + bounds = ([0, 0.001, non_solar_rate * 0.9], [1e6, 1.0, non_solar_rate * 1.1]) + + popt, _ = curve_fit(twilight_model, valid_twilight_x, valid_twilight_y, p0=p0, bounds=bounds, maxfev=50000) + y_pred = twilight_model(valid_twilight_x, *popt) + r_squared = 1 - np.sum((valid_twilight_y - y_pred)**2) / np.sum((valid_twilight_y - np.mean(valid_twilight_y))**2) + + if r_squared < 0: + # Fallback to a constant model (mean of the data) + mean_rate = np.mean(valid_twilight_y) + twilight_models[surface_type] = { + 'params': [mean_rate], + 'r2': 0.0, # R² of a constant model is 0 by definition + 'model': lambda x: np.full_like(x, mean_rate) + } + print(f"Twilight model ({surface_type}) failed with R² = {r_squared:.3f}. Using constant model: {mean_rate:.2e} ph/s") + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + df.loc[valid_twilight_mask, 'solar_background_rate'] = mean_rate + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + else: + print(f"Twilight model ({surface_type}): rate = {popt[0]:.2e} * exp(-{popt[1]:.2f} * solar_elevation^2) + {popt[2]:.2e}, R² = {r_squared:.3f}") + twilight_models[surface_type] = {'params': popt, 'r2': r_squared, 'model': lambda x: twilight_model(x, *popt)} + + twilight_mask = (df['classification'] == 'Twilight') & (df['surface_type'] == surface_type) + valid_twilight_mask = twilight_mask & df['solar_elevation'].notna() & np.isfinite(df['solar_elevation']) + x_for_model = df.loc[valid_twilight_mask, 'solar_elevation'] + df.loc[valid_twilight_mask, 'solar_background_rate'] = twilight_models[surface_type]['model'](x_for_model).clip(lower=0) + df.loc[twilight_mask & ~valid_twilight_mask, 'solar_background_rate'] = non_solar_rate + except RuntimeError as e: + print(f"Twilight model fitting failed for {surface_type}: {e}. Setting twilight rates to non-solar rate.") + df.loc[(df['classification'] == 'Twilight') & (df['surface_type'] == surface_type), 'solar_background_rate'] = non_solar_rate + else: + print("Too few twilight points for modeling. Setting twilight rates to non-solar rate.") + df.loc[df['classification'] == 'Twilight', 'solar_background_rate'] = non_solar_rate + return df, twilight_models + +def create_diagnostic_plot(df, daytime_df, twilight_df, daytime_models, twilight_models, non_solar_rate, output_dir): + """Create diagnostic plot of background rate vs. solar elevation.""" + plt.figure(figsize=(10, 6)) + plt.scatter(df['solar_elevation'], df['total_background_rate'], c='gray', alpha=0.5, label='Total') + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Land']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Land']['solar_background_rate'], + c='blue', alpha=0.5, label='Daytime Land') + plt.scatter(daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_elevation'], + daytime_df[daytime_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='cyan', alpha=0.5, label='Daytime Ocean') + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Land']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Land']['solar_background_rate'], + c='orange', alpha=0.5, label='Twilight Land') + plt.scatter(twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_elevation'], + twilight_df[twilight_df['surface_type'] == 'Ocean']['solar_background_rate'], + c='yellow', alpha=0.5, label='Twilight Ocean') + plt.axhline(non_solar_rate, color='r', linestyle='--', label=f'Non-Solar: {non_solar_rate:.2e} ph/s') + + # Plot daytime fits + for surface_type in daytime_models: + if daytime_models[surface_type]['model'] is not None: + x_fit = np.linspace(min(daytime_df['solar_elevation']), max(daytime_df['solar_elevation']), 100) + y_fit = daytime_models[surface_type]['model'](x_fit if 'power' not in daytime_models[surface_type] else x_fit - min(daytime_df['solar_elevation']) + 1) + plt.plot(x_fit, y_fit, label=f'Daytime {surface_type} Fit (R²={daytime_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + # Plot twilight fits + for surface_type in twilight_models: + if twilight_models[surface_type]['model'] is not None: + x_twilight = np.linspace(min(twilight_df['solar_elevation']), max(twilight_df['solar_elevation']), 100) + y_twilight = twilight_models[surface_type]['model'](x_twilight) + plt.plot(x_twilight, y_twilight, label=f'Twilight {surface_type} Fit (R²={twilight_models[surface_type]["r2"]:.3f})', + linestyle='-' if surface_type == 'Land' else '--') + + plt.yscale('log') + plt.ylim(1e3, 1e8) + plt.xlabel('Solar Elevation (degrees)') + plt.ylabel('Background Rate (ph/s)') + plt.legend() + plt.title('Background Rate vs. Solar Elevation (Land vs. Ocean)') + plt.savefig(os.path.join(output_dir, "background_vs_elevation_exp_fit_2021.png")) + plt.close() + +def create_calibration_plot(df, daytime_df, output_dir): + """Create a calibration plot using mean photon height as a proxy.""" + if not daytime_df.empty: + sample_file = daytime_df.iloc[0] + heights = sample_file['mean_photon_height'] + if np.isfinite(heights): + solar_rate = sample_file['solar_background_rate'] + bin_size = 0.1 + # Simulate histogram with 1000 points + hist, edges = np.histogram(np.array([heights] * 1000), bins=np.arange(min(heights) - 1, max(heights) + 1, bin_size)) + photons_per_bin = solar_rate * (1000 / np.sum(hist)) * bin_size / (max(heights) - min(heights) + 2) + calibrated_counts = np.maximum(hist - photons_per_bin, 0) + calibrated_heights = [] + for i, count in enumerate(calibrated_counts): + if count > 0: + calibrated_heights.extend(np.random.uniform(edges[i], edges[i+1], int(count))) + + plt.figure(figsize=(8, 6)) + plt.hist([heights] * 1000, bins=100, alpha=0.5, label='Original (Simulated)') + plt.hist(calibrated_heights, bins=100, alpha=0.5, label='Calibrated') + plt.xlabel('Photon Height (m)') + plt.ylabel('Count') + plt.legend() + plt.title(f'Calibration Example: {sample_file["file"]}') + plt.savefig(os.path.join(output_dir, "calibration_example.png")) + plt.close() + +def save_results(df, output_dir): + """Save the results to a CSV file.""" + df.to_csv(os.path.join(output_dir, "atl03_analysis_updated.csv"), index=False) + +def analyze_icesat2_data(atl03_directory, beam='gt1l', twilight_threshold=6.0, output_dir="output", shoreline_data_path=None): + """Main analysis function orchestrating the workflow.""" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + df = load_and_process_files(atl03_directory, beam, shoreline_data_path) + if df.empty: + raise ValueError("No data processed successfully. Check logs for errors.") + + df = assign_surface_type(df) + + # Compute total_background_rate based on surface_type + df['total_background_rate'] = np.where( + df['surface_type'] == 'Land', df['total_background_rate_land'], + np.where(df['surface_type'] == 'Ocean', df['total_background_rate_ocean'], np.nan) + ) + + df, daytime_df, nighttime_df, twilight_df, unknown_df = classify_data(df, twilight_threshold) + + non_solar_rate = calculate_non_solar_rate(nighttime_df) + df = compute_solar_background_rates(df, non_solar_rate) + + # Recreate filtered DataFrames to include the updated columns from compute_solar_background_rates + daytime_df = df[df['classification'] == 'Daytime'].copy() + nighttime_df = df[df['classification'] == 'Nighttime'].copy() + twilight_df = df[df['classification'] == 'Twilight'].copy() + unknown_df = df[df['classification'] == 'Unknown'].copy() + + df, daytime_models = fit_daytime_model(df, daytime_df, non_solar_rate) + df, twilight_models = fit_twilight_model(df, twilight_df, non_solar_rate) + save_results(df, output_dir) + create_diagnostic_plot(df, daytime_df, twilight_df, daytime_models, twilight_models, non_solar_rate, output_dir) + create_calibration_plot(df, daytime_df, output_dir) + + + return df, { + 'daytime_models': daytime_models, + 'twilight_models': twilight_models, + 'non_solar_rate': non_solar_rate + } + +if __name__ == "__main__": + args = get_args() + atl03_directory = os.path.join(args.workspace_path, args.atl03_path) + shoreline_data_path = os.path.join(args.workspace_path, args.shoreline_data) + output_dir = os.path.join(args.workspace_path, args.output_path) + df, models = analyze_icesat2_data(atl03_directory, args.beam, output_dir=output_dir, shoreline_data_path=shoreline_data_path) + print("Analysis complete.") + \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2_Jobscript.sh b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2_Jobscript.sh new file mode 100644 index 0000000..ecafb5b --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/analyze_solarbckgrd_icesat2_Jobscript.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +#SBATCH -N 1 +#SBATCH -n 12 +#SBATCH --mem 32g +#SBATCH -t 02:00:00 +#SBATCH --mail-type=end +#SBATCH --mail-user=wayne128@email.unc.edu + +module load anaconda +source activate Diffusion +cd /work/users/w/a/wayne128/ICESat2/IS2_kd_py/SolarBckgrd/ + +# python analyze_solarbckgrd_icesat2.py +# python analyze_nighttime_BG_rate_icesat2.py +python IS2_BG_Rate_Land_Ocean_plot.py \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/config.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/config.py new file mode 100644 index 0000000..ebfbcaa --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/config.py @@ -0,0 +1,79 @@ +import argparse + +def get_args(): + parser = argparse.ArgumentParser(description="Analyze ICESat-2 ATL03 data for solar background.") + + #task name + parser.add_argument("--taskID", type=str, default='2022_granules', + help="task name") + # workspace path + parser.add_argument("--workspace_path", type=str, default='/work/users/w/a/wayne128/ICESat2/IS2_kd_py/', + help="Root path for all data processing.") + + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/NC/NCCENTRAL/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/NC/NCNORTH/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/NC/NCSOUTH/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/CD/2018_granules/h5_files/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/CD/2019_granules/h5_files/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/CD/2020_granules/h5_files/', + # help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/CD/2021_granules/h5_files/', + # help="Base path for ICESat-2 ATL03 data.") + + parser.add_argument("--atl03_path", type=str, default='Dataset/ATL03_ICESat2/Extracted_HDF_Files/CD/2020_granules/h5_files/', + help="Base path for ICESat-2 ATL03 data.") + + # parser.add_argument("--parallel", action="store_true", help="Process files in parallel") + parser.add_argument("--beam", default="gt1l", help="Beam to process (e.g., gt1l)") + + + parser.add_argument("--atl03_file", type=str, default="processed_ATL03_20221001234100_01721701_006_01.h5", + help="Name of the ATL03 H5 file to process.")#processed_ATL03_20221001114643_01641707_006_01 + + parser.add_argument("--output_path", type=str, default='Dataset/Results/', + help="Directory for saving results.") + + # Shoreline data + parser.add_argument("--shoreline_data", type=str, + # default='Shorelines/GeoPkgGlobalShoreline.gpkg', + default='Dataset/Shorelines/ne_10m_land/ne_10m_land.shp', + help="Path to the global shoreline dataset.") + + # Bathymetry datasets + parser.add_argument("--gebco_path", nargs='+', type=str, + default='Dataset/Bathy/gebco_2024_geotiff/', + help="Path to GEBCO bathymetry datasets.") + + # Resolution settings + parser.add_argument("--horizontal_res", type=int, default=500, + help="Horizontal resolution for the analysis (in meters).") + parser.add_argument("--vertical_res", type=float, default=0.25, + help="Vertical resolution for the analysis.") + + # Analysis parameters + parser.add_argument("--subsurface_thresh", type=float, default=0.5, + help="Threshold for photons below the sea surface.") + parser.add_argument("--ignore_subsurface_height_thres", type=float, default=-1, + help="Maximum depth thres for Kd calculations (in meters).") + + return parser.parse_args() + +if __name__ == "__main__": + args = get_args() + # Access parameters like this: + atl03_file_path = args.workspace_path + args.atl03_file + + print("Processing file:", atl03_file_path) + print("Output path:", args.output_path) \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/data_processing.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/data_processing.cpython-39.pyc new file mode 100644 index 0000000..1f10637 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/data_processing.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/interpolation.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/interpolation.cpython-39.pyc new file mode 100644 index 0000000..a4e5d99 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/__pycache__/interpolation.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/data_processing.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/data_processing.py new file mode 100644 index 0000000..b55bbef --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/data_processing.py @@ -0,0 +1,179 @@ +# utils/data_processing.py + + +import re +import os +import io +import time +import math +import h5py +import logging +import netCDF4 +import numpy as np +import geopandas as gpd + +import pandas as pd +from datetime import datetime +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from scipy.spatial import ConvexHull + +import argparse +import subprocess +# import fiona +import utm +import pyproj +from pyproj import Transformer, Proj + +# from pyproj import Transformer +from matplotlib.widgets import LassoSelector +from matplotlib.path import Path + +from sklearn.cluster import DBSCAN +from .interpolation import * + + + + +# convert_wgs_to_utm function, see https://stackoverflow.com/a/40140326/4556479 +def convert_wgs_to_utm(lon: float, lat: float): + """Based on lat and lng, return best utm epsg-code""" + utm_band = str((math.floor((lon + 180) / 6) % 60) + 1) + if len(utm_band) == 1: + utm_band = '0' + utm_band + if lat >= 0: + epsg_code = 'epsg:326' + utm_band + return epsg_code + epsg_code = 'epsg:327' + utm_band + return epsg_code + + +def orthometric_correction(lat, lon, Z, epsg): + # Define the Proj string + #To transform from WGS84 ellipsoidal height + # to EGM2008 orthometric height using PyProj + # proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +vunits=m +no_defs +geoidgrids=egm2008-1.gtx' + # # Define the Proj string for WGS84 ellipsoidal height + # wgs84_proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs' + + # # Define the Proj string for EGM2008 orthometric height: egm08_25,egm2008-1 + # egm2008_proj_string = \ + # '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs ' \ + # '+geoidgrids=C:/Workstation/ICESat2_HLS/Code/Geoids/egm08_25.gtx' + + # transform ellipsoid (WGS84) height to orthometric height + # transformer = Transformer.from_crs(wgs84_proj_string, egm2008_proj_string, always_xy=True) + transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True) + X_egm08, Y_egm08, Z_egm08 = transformer.transform(lon, lat, Z) + + # transform WGS84 proj to local UTM + myProj = Proj(epsg) + X_utm, Y_utm = myProj(lon, lat) + + return Y_utm, X_utm, Z_egm08 + + + + + +# requires that the input gdf has ranged index values i +# will need to change if index is changed to time or something +# this currently checks point in polygon for EVERY point +# would be significantly sped up if evaluated at 10m or something similar +# maybe later, fine for now +import logging + +# Set up logging +logger = logging.getLogger(__name__) + +def isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF): + """ + Classify ICESat-2 points as land (1) or ocean (0) using shoreline data. + + Args: + shoreline_data_path (str): Path to the shoreline dataset (e.g., shapefile). + ICESat2_GDF (gpd.GeoDataFrame): GeoDataFrame with ICESat-2 points. + + Returns: + np.ndarray: Array of 0s (ocean) or 1s (land) for each point. + """ + try: + # Ensure ICESat2_GDF has a valid geometry column and CRS + if ICESat2_GDF.empty: + logger.error("ICESat2_GDF is empty, cannot classify land/ocean.") + return np.zeros(len(ICESat2_GDF), dtype=bool) # Default to ocean + + # Confirm that ICESat2_GDF already has the correct CRS + if ICESat2_GDF.crs is None or ICESat2_GDF.crs.to_string() != "EPSG:4326": + logger.info("Setting CRS of ICESat2_GDF to EPSG:4326") + ICESat2_GDF.set_crs("EPSG:4326", inplace=True) + + # Load shoreline dataset with bounding box optimization + logger.info(f"Loading shoreline data from {shoreline_data_path}") + try: + land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF, engine='pyogrio') + except Exception as e: + logger.warning(f"Failed to load shoreline data with pyogrio: {e}. Falling back to default engine.") + land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF) + + # Ensure the shoreline data is in the same CRS as ICESat2_GDF + if land_polygon_gdf.crs.to_string() != "EPSG:4326": + logger.info("Reprojecting shoreline data to EPSG:4326") + land_polygon_gdf = land_polygon_gdf.to_crs("EPSG:4326") + + # Initialize land labels as 0 (ocean) + land_point_labels = np.zeros(len(ICESat2_GDF), dtype=int) + + # Perform spatial join to find points within land polygons + pts_in_land = gpd.sjoin(ICESat2_GDF, land_polygon_gdf, predicate='within') + + # Update labels for points that are within land polygons + land_loc = ICESat2_GDF.index.isin(pts_in_land.index) + land_point_labels[land_loc] = 1 + # Points not in land are already 0 (ocean) + + logger.info(f"Classified {np.sum(land_point_labels)} points as land, {len(land_point_labels) - np.sum(land_point_labels)} as ocean") + return land_point_labels + + except Exception as e: + logger.error(f"Error in isolate_sea_land_photons: {str(e)}") + logger.error(f"ICESat2_GDF info: {ICESat2_GDF.head()}") + logger.error("Returning False (ocean) for all points") + return np.zeros(len(ICESat2_GDF), dtype=bool) # Default to ocean + + +def create_photon_dataframe(lat_ph, lon_ph, ref_elev, ref_azimuth, geoid, h_ph, \ + quality_ph, is_land_label_interp1d, signal_conf_photon, x_atc, relative_AT_dist): + # Apply geoid correction to the photon heights to convert them from ellipsoidal to orthometric heights + h_ph_geoid_cor = h_ph[:] - geoid[:] + + # Determine the EPSG code for the UTM zone based on the first photon's longitude and latitude + epsg_code = convert_wgs_to_utm(lon_ph[0], lat_ph[0]) + + # Perform orthometric correction to obtain UTM coordinates and corrected heights + lat_utm, lon_utm, h_ph_cor = orthometric_correction(lat_ph, lon_ph, h_ph, epsg_code) + + # Put the data into the dataframe + sea_photon_dataset = pd.DataFrame({ + 'latitude': lat_ph, + 'longitude': lon_ph, + 'lat': lat_utm, + 'lon': lon_utm, + 'photon_height': h_ph_geoid_cor, + 'quality_ph': quality_ph, + 'is_land_label': is_land_label_interp1d, + 'photon_conf': signal_conf_photon, + 'ref_elevation': ref_elev, + 'ref_azimuth': ref_azimuth, + 'relative_AT_dist': relative_AT_dist + }, columns=['latitude', 'longitude', 'lat', 'lon', 'photon_height', 'quality_ph', 'is_land_label', 'photon_conf', 'ref_elevation', 'ref_azimuth', 'relative_AT_dist']) + return sea_photon_dataset + + + + + + + + + diff --git a/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/interpolation.py b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/interpolation.py new file mode 100644 index 0000000..59b04b4 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/SolarBckgrd/noise_utils/interpolation.py @@ -0,0 +1,19 @@ +# utils/interpolation.py + +import scipy.interpolate + +def interpolate_labels(segment_lat, labels): + model = scipy.interpolate.interp1d(segment_lat, labels, fill_value="extrapolate") + return model + +def apply_interpolation(model, lat_ph): + return model(lat_ph) + +def geoid_correction(lat_ph, segment_lat, geoid): + model = interpolate_labels(segment_lat, geoid) + return apply_interpolation(model, lat_ph) + +def refraction_correction(lat_ph, segment_lat, ref_elev, ref_azimuth): + elev_model = interpolate_labels(segment_lat, ref_elev) + azimuth_model = interpolate_labels(segment_lat, ref_azimuth) + return apply_interpolation(elev_model, lat_ph), apply_interpolation(azimuth_model, lat_ph) diff --git a/icesat-2_kdph_py/kd_utils/__init__.py b/icesat-2_kdph_py/kd_utils/__init__.py new file mode 100644 index 0000000..f8f285a --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/__init__.py @@ -0,0 +1,8 @@ +# utils/__init__.py + +from .data_processing import load_data, extract_file_params, Extract_sea_photons, create_photon_dataframe +from .visualization import plot_photon_height, plot_kd_photons +from .Kd_analysis import CalculateKdFromFilteredSubsurfacePhoton +from .interpolation import interpolate_labels, apply_interpolation, geoid_correction, refraction_correction +from .bathy_processing import * +from .sea_photons_analysis import * \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/Kd_analysis.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/Kd_analysis.cpython-39.pyc new file mode 100644 index 0000000..4d16cb6 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/Kd_analysis.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/__init__.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..c44b25a Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/bathy_processing.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/bathy_processing.cpython-39.pyc new file mode 100644 index 0000000..c9d071c Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/bathy_processing.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/data_processing.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/data_processing.cpython-39.pyc new file mode 100644 index 0000000..b7f1824 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/data_processing.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/interpolation.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/interpolation.cpython-39.pyc new file mode 100644 index 0000000..d757115 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/interpolation.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/kd_utils.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/kd_utils.cpython-39.pyc new file mode 100644 index 0000000..4e5b77a Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/kd_utils.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/sea_photons_analysis.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/sea_photons_analysis.cpython-39.pyc new file mode 100644 index 0000000..94d70d0 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/sea_photons_analysis.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/__pycache__/visualization.cpython-39.pyc b/icesat-2_kdph_py/kd_utils/__pycache__/visualization.cpython-39.pyc new file mode 100644 index 0000000..942ac28 Binary files /dev/null and b/icesat-2_kdph_py/kd_utils/__pycache__/visualization.cpython-39.pyc differ diff --git a/icesat-2_kdph_py/kd_utils/bathy_processing.py b/icesat-2_kdph_py/kd_utils/bathy_processing.py new file mode 100644 index 0000000..73b028f --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/bathy_processing.py @@ -0,0 +1,647 @@ +# utils/bathy_processing.py +import os +import pandas as pd +import rasterio +import numpy as np +from kd_utils.sea_photons_analysis import ( + get_sea_surface_height_adaptive, + get_sea_surface_height_static, + horizontal_vertical_bin_dataset, +) +from rtree import index +from shapely.geometry import box, Point +from scipy.spatial import cKDTree +from scipy.stats import norm + + +def apply_optional_histogram_quality_filter( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + min_ratio=0.05, + depth_min=6.0, + depth_max=7.0 +): + """ + Optional histogram quality check: + keep along-track bins only if the target depth-band photon ratio is >= min_ratio. + """ + if subsurface_photon_dataset.empty: + return subsurface_photon_dataset + + grouped = binned_dataset_sea_surface.groupby(['lat_bins'], observed=False) + lat_bin_keys = list(grouped.groups.keys()) + if len(lat_bin_keys) != len(sea_surface_height): + return subsurface_photon_dataset + + quality_df = pd.DataFrame({ + 'lat_bins': lat_bin_keys, + 'sea_surface_height': sea_surface_height + }).dropna(subset=['sea_surface_height']).copy() + if quality_df.empty: + return subsurface_photon_dataset.iloc[0:0].copy() + + ratios = [] + for _, row in quality_df.iterrows(): + lat_bin = row['lat_bins'] + surface = row['sea_surface_height'] + bin_data = binned_dataset_sea_surface[binned_dataset_sea_surface['lat_bins'] == lat_bin] + if bin_data.empty: + ratios.append(0.0) + continue + + depth = surface - bin_data['photon_height'] + underwater_mask = depth > 0 + total_underwater = int(underwater_mask.sum()) + if total_underwater == 0: + ratios.append(0.0) + continue + + band_mask = underwater_mask & (depth >= depth_min) & (depth <= depth_max) + ratios.append(float(band_mask.sum()) / float(total_underwater)) + + quality_df['hist_quality_ratio'] = ratios + valid_bins = set(quality_df.loc[quality_df['hist_quality_ratio'] >= min_ratio, 'lat_bins'].tolist()) + return subsurface_photon_dataset[subsurface_photon_dataset['lat_bins'].isin(valid_bins)].copy() + + +def apply_optional_surface_sigma_filter( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + sigma_max=0.5 +): + """ + Optional Gaussian surface sigma filter: + discard along-track bins if fitted surface sigma exceeds sigma_max. + """ + if subsurface_photon_dataset.empty: + return subsurface_photon_dataset + + grouped = binned_dataset_sea_surface.groupby(['lat_bins'], observed=False) + lat_bin_keys = list(grouped.groups.keys()) + if len(lat_bin_keys) != len(sea_surface_height): + return subsurface_photon_dataset + + sigma_rows = [] + for lat_bin, surface in zip(lat_bin_keys, sea_surface_height): + if pd.isna(surface): + continue + bin_data = binned_dataset_sea_surface[binned_dataset_sea_surface['lat_bins'] == lat_bin] + if bin_data.empty: + continue + peak_data = bin_data[ + (bin_data['photon_height'] > surface - 1.0) & + (bin_data['photon_height'] < surface + 1.0) + ] + if len(peak_data) > 10: + _, sigma = norm.fit(peak_data['photon_height']) + else: + sigma = np.nan + sigma_rows.append((lat_bin, sigma)) + + if not sigma_rows: + return subsurface_photon_dataset + + sigma_df = pd.DataFrame(sigma_rows, columns=['lat_bins', 'surface_sigma']) + valid_bins = set(sigma_df.loc[sigma_df['surface_sigma'] <= sigma_max, 'lat_bins'].tolist()) + return subsurface_photon_dataset[subsurface_photon_dataset['lat_bins'].isin(valid_bins)].copy() + + +def apply_optional_refraction_correction( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + water_temp_c=20.0, + wavelength_nm=532.0 +): + """ + Optional refraction correction for subsurface photons. + Updates photon positions/heights using Snell-based geometry. + """ + required_cols = {'lat_bins', 'photon_height', 'ref_elevation', 'ref_azimuth', 'lon', 'lat'} + if subsurface_photon_dataset.empty or (not required_cols.issubset(set(subsurface_photon_dataset.columns))): + return subsurface_photon_dataset + + grouped = binned_dataset_sea_surface.groupby(['lat_bins'], observed=False) + lat_bin_keys = list(grouped.groups.keys()) + if len(lat_bin_keys) != len(sea_surface_height): + return subsurface_photon_dataset + + surface_df = pd.DataFrame({'lat_bins': lat_bin_keys, 'sea_surface_height': sea_surface_height}) + corrected = subsurface_photon_dataset.merge(surface_df, on='lat_bins', how='left').copy() + if corrected['sea_surface_height'].isna().all(): + return subsurface_photon_dataset + + corrected['photon_height_pre_refraction'] = corrected['photon_height'] + corrected['lon_pre_refraction'] = corrected['lon'] + corrected['lat_pre_refraction'] = corrected['lat'] + + # Refraction index parameterization from legacy module. + a = -0.000001501562500 + b = 0.000000107084865 + c = -0.000042759374989 + d = -0.000160475520686 + e = 1.398067112092424 + n1 = 1.00029 + n2 = (a * water_temp_c ** 2) + (b * wavelength_nm ** 2) + (c * water_temp_c) + (d * wavelength_nm) + e + + ref_elev = pd.to_numeric(corrected['ref_elevation'], errors='coerce').to_numpy(dtype=float) + ref_az = pd.to_numeric(corrected['ref_azimuth'], errors='coerce').to_numpy(dtype=float) + z = pd.to_numeric(corrected['photon_height'], errors='coerce').to_numpy(dtype=float) + ws = pd.to_numeric(corrected['sea_surface_height'], errors='coerce').to_numpy(dtype=float) + x = pd.to_numeric(corrected['lon'], errors='coerce').to_numpy(dtype=float) + y = pd.to_numeric(corrected['lat'], errors='coerce').to_numpy(dtype=float) + + # Convert to radians if values look like degrees. + if np.nanmax(np.abs(ref_elev)) > (2 * np.pi): + ref_elev = np.deg2rad(ref_elev) + if np.nanmax(np.abs(ref_az)) > (2 * np.pi): + ref_az = np.deg2rad(ref_az) + + valid_mask = np.isfinite(ref_elev) & np.isfinite(ref_az) & np.isfinite(z) & np.isfinite(ws) & (z <= ws) + if not np.any(valid_mask): + return subsurface_photon_dataset + + theta1 = (np.pi / 2.0) - ref_elev[valid_mask] + theta2_arg = (n1 * np.sin(theta1)) / n2 + theta2_arg = np.clip(theta2_arg, -1.0, 1.0) + theta2 = np.arcsin(theta2_arg) + + D = ws[valid_mask] - z[valid_mask] + cos_theta1 = np.cos(theta1) + cos_theta1 = np.where(np.abs(cos_theta1) < 1e-6, np.nan, cos_theta1) + S = D / cos_theta1 + R = (S * n1) / n2 + Gamma = (np.pi / 2.0) - theta1 + phi = theta1 - theta2 + P_sq = np.maximum(R ** 2 + S ** 2 - 2 * R * S * np.cos(phi), 0.0) + P = np.sqrt(P_sq) + alpha_arg = np.divide(R * np.sin(phi), P, out=np.zeros_like(P), where=(P != 0)) + alpha_arg = np.clip(alpha_arg, -1.0, 1.0) + alpha = np.arcsin(alpha_arg) + Beta = Gamma - alpha + + DY = P * np.cos(Beta) + DZ = P * np.sin(Beta) + DE = DY * np.sin(ref_az[valid_mask]) + DN = DY * np.cos(ref_az[valid_mask]) + + x_corr = x[valid_mask] + DE + y_corr = y[valid_mask] + DN + z_corr = z[valid_mask] + DZ + + corrected.loc[valid_mask, 'lon'] = x_corr + corrected.loc[valid_mask, 'lat'] = y_corr + corrected.loc[valid_mask, 'photon_height'] = z_corr + return corrected + + +def apply_sea_surface_flattening( + subsurface_photon_dataset, + sea_surface_height, + lat_bin_keys, + horizontal_res, + flattening_window_m=500 +): + """ + Flatten small-bin sea-surface variations to the mean sea level of larger along-track windows. + This follows the standalone SeaSurfaceFlattening module logic, but is optional. + """ + if subsurface_photon_dataset.empty: + return subsurface_photon_dataset + + if len(sea_surface_height) != len(lat_bin_keys): + return subsurface_photon_dataset + + surface_df = pd.DataFrame({ + 'lat_bins': lat_bin_keys, + 'sea_surface_height_local': sea_surface_height + }).dropna(subset=['sea_surface_height_local']).copy() + if surface_df.empty: + return subsurface_photon_dataset + + surface_df['lat_bins_num'] = pd.to_numeric(surface_df['lat_bins'], errors='coerce') + surface_df = surface_df.dropna(subset=['lat_bins_num']) + if surface_df.empty: + return subsurface_photon_dataset + + big_bin_size = max(1, int(round(flattening_window_m / max(horizontal_res, 1)))) + surface_df['big_bin'] = (surface_df['lat_bins_num'].astype(int) // big_bin_size).astype(int) + mean_surface = ( + surface_df.groupby('big_bin', observed=False)['sea_surface_height_local'] + .mean() + .rename('sea_surface_height_bigbin_mean') + .reset_index() + ) + surface_df = surface_df.merge(mean_surface, on='big_bin', how='left') + surface_df['sea_surface_flattening_offset'] = ( + surface_df['sea_surface_height_bigbin_mean'] - surface_df['sea_surface_height_local'] + ) + + flattened = subsurface_photon_dataset.copy() + flattened['lat_bins_num'] = pd.to_numeric(flattened['lat_bins'], errors='coerce') + flattened['big_bin'] = (flattened['lat_bins_num'].fillna(-1).astype(int) // big_bin_size).astype(int) + flattened = flattened.merge( + surface_df[['lat_bins', 'sea_surface_height_local', 'sea_surface_height_bigbin_mean', 'sea_surface_flattening_offset']], + on='lat_bins', + how='left' + ) + flattened['photon_height_original'] = flattened['photon_height'] + flattened['photon_height'] = ( + flattened['photon_height'] - flattened['sea_surface_flattening_offset'].fillna(0.0) + ) + return flattened.drop(columns=['lat_bins_num', 'big_bin'], errors='ignore') + +# 1. Function to create an R-tree spatial index for raster bounds +def create_spatial_index(gebco_paths): + """ + Create an R-tree spatial index for raster bounds to quickly find relevant rasters. + + Parameters: + gebco_paths (list): List of paths to GEBCO raster files. + + Returns: + raster_data_dict (dict): Dictionary containing loaded rasters and their respective data. + spatial_index (rtree.index.Index): R-tree spatial index for raster bounds. + """ + idx = index.Index() + raster_data_dict = {} + for i, path in enumerate(gebco_paths): + with rasterio.Env(GTIFF_SRS_SOURCE='EPSG'): + gebco_raster = rasterio.open(path) + raster_data = gebco_raster.read(1) + raster_data_dict[path] = (gebco_raster, raster_data) + bounds = gebco_raster.bounds + idx.insert(i, (bounds.left, bounds.bottom, bounds.right, bounds.top), obj=path) + return raster_data_dict, idx + +# 2. Function to determine which rasters are needed for a given set of coordinates using spatial index in a batch manner +def get_relevant_rasters_using_index(lons, lats, raster_data_dict, spatial_index): + """ + Determine which raster datasets are relevant for the given coordinates using spatial index. + This function uses a more efficient approach by performing a bounding box query for batches of points. + + Parameters: + lons (numpy.ndarray): Array of longitudes. + lats (numpy.ndarray): Array of latitudes. + raster_data_dict (dict): Dictionary containing raster datasets and their data. + spatial_index (rtree.index.Index): R-tree spatial index for raster bounds. + + Returns: + relevant_rasters (list): List of relevant raster datasets and their respective data. + """ + # Create a bounding box that covers all points + min_lon, max_lon = lons.min(), lons.max() + min_lat, max_lat = lats.min(), lats.max() + bounding_box = box(min_lon, min_lat, max_lon, max_lat) + + # Get all rasters that intersect with the bounding box + matches = list(spatial_index.intersection((bounding_box.bounds), objects=True)) + relevant_paths = {match.object for match in matches} + relevant_rasters = [(raster_data_dict[path][0], raster_data_dict[path][1]) for path in relevant_paths] + return relevant_rasters + + +# 3. Function to get seafloor elevation from the relevant rasters in a vectorized manner +def get_seafloor_elevation(lons, lats, relevant_rasters): + """ + Get seafloor elevations for a batch of points based on longitude and latitude from relevant rasters. + + Parameters: + lons (numpy.ndarray): Array of longitudes. + lats (numpy.ndarray): Array of latitudes. + relevant_rasters (list): List of relevant raster datasets and their respective data. + + Returns: + seafloor_elevations (numpy.ndarray): Array of seafloor elevations. + """ + points = np.vstack((lons, lats)).T + seafloor_elevations = np.full(len(lons), np.nan) + + for gebco_raster, raster_data in relevant_rasters: + # Use rasterio.sample to get values for multiple points in a batch + values = list(gebco_raster.sample(points)) + for idx, value in enumerate(values): + if np.isnan(seafloor_elevations[idx]) and value is not None: + seafloor_elevations[idx] = value[0] + + return seafloor_elevations + + +# 4. The main process function that ties everything together +def process_seafloor_data(sea_photon_dataset, gebco_paths, Ignore_Subsurface_Height_Thres): + """ + Main function to process the seafloor data. + + Parameters: + gebco_paths (list): List of path to GEBCO raster files. + sea_photon_dataset (pandas.DataFrame): Dataset containing longitude, latitude, and photon height. + + Returns: + filtered_sea_photon_dataset (pandas.DataFrame): Filtered dataset containing points above the seafloor. + """ + + # Create spatial index and load GEBCO Raster Data + raster_data_dict, spatial_index = create_spatial_index(gebco_paths) + + # Get the relevant rasters using spatial index + lons = sea_photon_dataset['longitude'].values + lats = sea_photon_dataset['latitude'].values + relevant_rasters = get_relevant_rasters_using_index(lons, lats, raster_data_dict, spatial_index) + + # Get seafloor elevation for all points + sea_photon_dataset['seafloor_elevation'] = get_seafloor_elevation(lons, lats, relevant_rasters) + + # Calculate depth (assuming seafloor_elevation is negative below sea level) + # If your elevation is positive below sea level, remove the negative sign + sea_photon_dataset['depth'] = -sea_photon_dataset['seafloor_elevation'] + + # Filter out points: + # 1. below the seafloor (photon_height > seafloor_elevation) + # 2. where depth is less than 6 meters (depth >= 6) + filtered_sea_photon_dataset = sea_photon_dataset[ + (sea_photon_dataset['photon_height'] > sea_photon_dataset['seafloor_elevation']) & + (sea_photon_dataset['depth'] >= Ignore_Subsurface_Height_Thres) + ] + + return filtered_sea_photon_dataset + + +def load_atl24_points(atl24_file_path): + """ + Load ATL24 point dataset and normalize to columns: longitude, latitude, seafloor_elevation_atl24. + """ + if (not atl24_file_path) or (not os.path.exists(atl24_file_path)): + return pd.DataFrame(columns=['longitude', 'latitude', 'seafloor_elevation_atl24']) + + lower_path = atl24_file_path.lower() + if lower_path.endswith(('.csv', '.txt')): + atl24_df = pd.read_csv(atl24_file_path) + elif lower_path.endswith('.parquet'): + atl24_df = pd.read_parquet(atl24_file_path) + elif lower_path.endswith(('.gpkg', '.shp', '.geojson')): + try: + import geopandas as gpd + atl24_gdf = gpd.read_file(atl24_file_path) + atl24_df = pd.DataFrame(atl24_gdf) + if ('longitude' not in atl24_df.columns or 'latitude' not in atl24_df.columns) and ('geometry' in atl24_gdf.columns): + atl24_df['longitude'] = atl24_gdf.geometry.x + atl24_df['latitude'] = atl24_gdf.geometry.y + except Exception: + return pd.DataFrame(columns=['longitude', 'latitude', 'seafloor_elevation_atl24']) + else: + return pd.DataFrame(columns=['longitude', 'latitude', 'seafloor_elevation_atl24']) + + lon_candidates = ['longitude', 'lon', 'x', 'LONGITUDE', 'LON'] + lat_candidates = ['latitude', 'lat', 'y', 'LATITUDE', 'LAT'] + elev_candidates = [ + 'seafloor_elevation', 'bottom_elevation', 'bathymetry', 'elevation', 'z', + 'depth', 'water_depth' + ] + + lon_col = next((c for c in lon_candidates if c in atl24_df.columns), None) + lat_col = next((c for c in lat_candidates if c in atl24_df.columns), None) + elev_col = next((c for c in elev_candidates if c in atl24_df.columns), None) + if lon_col is None or lat_col is None or elev_col is None: + return pd.DataFrame(columns=['longitude', 'latitude', 'seafloor_elevation_atl24']) + + out_df = pd.DataFrame({ + 'longitude': pd.to_numeric(atl24_df[lon_col], errors='coerce'), + 'latitude': pd.to_numeric(atl24_df[lat_col], errors='coerce'), + 'atl24_raw_bathy': pd.to_numeric(atl24_df[elev_col], errors='coerce') + }).dropna(subset=['longitude', 'latitude', 'atl24_raw_bathy']).copy() + + if 'depth' in elev_col.lower() and ('elev' not in elev_col.lower()): + out_df['seafloor_elevation_atl24'] = -out_df['atl24_raw_bathy'].abs() + else: + out_df['seafloor_elevation_atl24'] = out_df['atl24_raw_bathy'] + + return out_df[['longitude', 'latitude', 'seafloor_elevation_atl24']] + + +def process_seafloor_data_atl24( + sea_photon_dataset, + atl24_file_path, + Ignore_Subsurface_Height_Thres, + max_match_distance_deg=0.01 +): + """ + Match ATL24 bathymetry points to photons, then filter below seafloor and shallow bins. + Returns filtered dataset and number of matched photons. + """ + atl24_points = load_atl24_points(atl24_file_path) + if atl24_points.empty: + return sea_photon_dataset, 0 + + photon_coords = sea_photon_dataset[['longitude', 'latitude']].to_numpy(dtype=float) + atl24_coords = atl24_points[['longitude', 'latitude']].to_numpy(dtype=float) + if len(photon_coords) == 0 or len(atl24_coords) == 0: + return sea_photon_dataset, 0 + + tree = cKDTree(atl24_coords) + distances, indices = tree.query(photon_coords, k=1, distance_upper_bound=max_match_distance_deg) + valid_match = np.isfinite(distances) & (indices < len(atl24_points)) + matched_count = int(valid_match.sum()) + if matched_count == 0: + return sea_photon_dataset, 0 + + matched_dataset = sea_photon_dataset.copy() + matched_dataset['seafloor_elevation'] = np.nan + matched_dataset.loc[valid_match, 'seafloor_elevation'] = atl24_points['seafloor_elevation_atl24'].to_numpy()[indices[valid_match]] + matched_dataset = matched_dataset.dropna(subset=['seafloor_elevation']).copy() + matched_dataset['depth'] = -matched_dataset['seafloor_elevation'] + + filtered_dataset = matched_dataset[ + (matched_dataset['photon_height'] > matched_dataset['seafloor_elevation']) & + (matched_dataset['depth'] >= Ignore_Subsurface_Height_Thres) + ] + return filtered_dataset, matched_count + + +# 5. The function to get the subsurface photon dataset +def get_subsurface_photon( + binned_dataset_sea_surface, + GEBCO_paths, + subsurface_thresh, + Ignore_Subsurface_Height_Thres, + use_atl24_filter=False, + atl24_file_path='', + atl24_max_match_distance_deg=0.01, + use_gebco_filter=False, + apply_histogram_quality_filter=False, + histogram_quality_min_ratio=0.05, + histogram_quality_depth_min=6.0, + histogram_quality_depth_max=7.0, + apply_surface_sigma_filter=False, + surface_sigma_max=0.5, + apply_refraction_correction=False, + refraction_water_temp_c=20.0, + refraction_wavelength_nm=532.0, + apply_post_refraction_refit=False, + apply_flattening=False, + flattening_window_m=500, + horizontal_res=500, + vertical_res=0.25 +): + # get sea surface height + # threshold to determine how much depth below sea surface will be accounted + # Output: + # ->final_sea_surface_height + # ->sea_surface_height_abnormal_label + # ->sea_surface_dominated_label + # # static threshold + # sea_surface_height, sea_surface_label, solo_sea_surface_label, subsurface_photon_dataset = \ + # get_sea_surface_height_static(binned_dataset_sea_surface, subsurface_thresh) + # adaptive threshold + sea_surface_height, sea_surface_label, solo_sea_surface_label, subsurface_photon_dataset = \ + get_sea_surface_height_adaptive(binned_dataset_sea_surface) + + if apply_histogram_quality_filter: + subsurface_photon_dataset = apply_optional_histogram_quality_filter( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + min_ratio=histogram_quality_min_ratio, + depth_min=histogram_quality_depth_min, + depth_max=histogram_quality_depth_max + ) + + if apply_surface_sigma_filter: + subsurface_photon_dataset = apply_optional_surface_sigma_filter( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + sigma_max=surface_sigma_max + ) + + if apply_refraction_correction: + subsurface_photon_dataset = apply_optional_refraction_correction( + binned_dataset_sea_surface, + subsurface_photon_dataset, + sea_surface_height, + water_temp_c=refraction_water_temp_c, + wavelength_nm=refraction_wavelength_nm + ) + if apply_post_refraction_refit: + corrected_full_dataset = apply_optional_refraction_correction( + binned_dataset_sea_surface, + binned_dataset_sea_surface.copy(), + sea_surface_height, + water_temp_c=refraction_water_temp_c, + wavelength_nm=refraction_wavelength_nm + ) + rebinned_corrected = horizontal_vertical_bin_dataset( + corrected_full_dataset, + horizontal_res, + vertical_res + ) + sea_surface_height, sea_surface_label, solo_sea_surface_label, subsurface_photon_dataset = \ + get_sea_surface_height_adaptive(rebinned_corrected) + + if use_atl24_filter: + atl24_filtered, atl24_match_count = process_seafloor_data_atl24( + subsurface_photon_dataset, + atl24_file_path, + abs(Ignore_Subsurface_Height_Thres), + max_match_distance_deg=atl24_max_match_distance_deg + ) + if atl24_match_count > 0: + filtered_seafloor_subsurface_photon_dataset = atl24_filtered + elif use_gebco_filter: + filtered_seafloor_subsurface_photon_dataset = process_seafloor_data( + subsurface_photon_dataset, GEBCO_paths, abs(Ignore_Subsurface_Height_Thres) + ) + else: + filtered_seafloor_subsurface_photon_dataset = subsurface_photon_dataset + elif use_gebco_filter: + filtered_seafloor_subsurface_photon_dataset = process_seafloor_data( + subsurface_photon_dataset, GEBCO_paths, abs(Ignore_Subsurface_Height_Thres) + ) + else: + filtered_seafloor_subsurface_photon_dataset = subsurface_photon_dataset + + if apply_flattening: + lat_bin_keys = list( + binned_dataset_sea_surface.groupby(['lat_bins'], observed=False).groups.keys() + ) + filtered_seafloor_subsurface_photon_dataset = apply_sea_surface_flattening( + filtered_seafloor_subsurface_photon_dataset, + sea_surface_height, + lat_bin_keys, + horizontal_res=horizontal_res, + flattening_window_m=flattening_window_m + ) + return sea_surface_height, sea_surface_label, filtered_seafloor_subsurface_photon_dataset + + +# Main processing function to apply the subsurface photon filtering beam-by-beam +def process_subsurface_photon_filtering( + binned_dataset_sea_surface, + GEBCO_paths, + subsurface_thresh, + Ignore_Subsurface_Height_Thres, + use_atl24_filter=False, + atl24_file_path='', + atl24_max_match_distance_deg=0.01, + use_gebco_filter=False, + apply_histogram_quality_filter=False, + histogram_quality_min_ratio=0.05, + histogram_quality_depth_min=6.0, + histogram_quality_depth_max=7.0, + apply_surface_sigma_filter=False, + surface_sigma_max=0.5, + apply_refraction_correction=False, + refraction_water_temp_c=20.0, + refraction_wavelength_nm=532.0, + apply_post_refraction_refit=False, + apply_flattening=False, + flattening_window_m=500, + horizontal_res=500, + vertical_res=0.25 +): + # Initialize lists to store results for each beam + sea_surface_heights = [] + sea_surface_labels = [] + filtered_beam_datasets = [] + + # Group the binned dataset by 'beam_id' and process each group separately + for beam_id, beam_data in binned_dataset_sea_surface.groupby('beam_id'): + print(f'Processing subsurface filtering for beam: {beam_id}') + + # Apply get_subsurface_photon to the current beam's dataset + sea_surface_height, sea_surface_label, filtered_seafloor_subsurface_photon_dataset = \ + get_subsurface_photon( + beam_data, + GEBCO_paths, + subsurface_thresh, + Ignore_Subsurface_Height_Thres, + use_atl24_filter=use_atl24_filter, + atl24_file_path=atl24_file_path, + atl24_max_match_distance_deg=atl24_max_match_distance_deg, + use_gebco_filter=use_gebco_filter, + apply_histogram_quality_filter=apply_histogram_quality_filter, + histogram_quality_min_ratio=histogram_quality_min_ratio, + histogram_quality_depth_min=histogram_quality_depth_min, + histogram_quality_depth_max=histogram_quality_depth_max, + apply_surface_sigma_filter=apply_surface_sigma_filter, + surface_sigma_max=surface_sigma_max, + apply_refraction_correction=apply_refraction_correction, + refraction_water_temp_c=refraction_water_temp_c, + refraction_wavelength_nm=refraction_wavelength_nm, + apply_post_refraction_refit=apply_post_refraction_refit, + apply_flattening=apply_flattening, + flattening_window_m=flattening_window_m, + horizontal_res=horizontal_res, + vertical_res=vertical_res + ) + + # Append each result to the lists + sea_surface_heights.append(sea_surface_height) + sea_surface_labels.append(sea_surface_label) + filtered_beam_datasets.append(filtered_seafloor_subsurface_photon_dataset) + + # Combine all filtered beam datasets into a single DataFrame + combined_filtered_dataset = pd.concat(filtered_beam_datasets, ignore_index=True) + + return sea_surface_heights, sea_surface_labels, combined_filtered_dataset diff --git a/icesat-2_kdph_py/kd_utils/data_processing.py b/icesat-2_kdph_py/kd_utils/data_processing.py new file mode 100644 index 0000000..42df0a8 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/data_processing.py @@ -0,0 +1,733 @@ +# utils/data_processing.py + + +import re +import os +import io +import time +import math +import h5py +import logging +import netCDF4 +import numpy as np +import geopandas as gpd + +import pandas as pd +from datetime import datetime +import matplotlib.pyplot as plt +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from scipy.spatial import ConvexHull + +import argparse +import subprocess +# import fiona +import utm +import pyproj +from pyproj import Transformer, Proj + +# from pyproj import Transformer +from matplotlib.widgets import LassoSelector +from matplotlib.path import Path + +from sklearn.cluster import DBSCAN +from .interpolation import * + + + +#this function from icesat2_toolkit +# PURPOSE: read ICESat-2 ATL03 HDF5 data files +def read_granule(FILENAME, ATTRIBUTES=False, **kwargs): + """ + Reads ICESat-2 ATL03 Global Geolocated Photons data files + + Parameters + ---------- + FILENAME: str + full path to ATL03 file + ATTRIBUTES: bool, default False + read file, group and variable attributes + + Returns + ------- + IS2_atl03_mds: dict + ATL03 variables + IS2_atl03_attrs: dict + ATL03 attributes + IS2_atl03_beams: list + valid ICESat-2 beams within ATL03 file + """ + # Open the HDF5 file for reading + if isinstance(FILENAME, io.IOBase): + fileID = h5py.File(FILENAME, 'r') + else: + fileID = h5py.File(os.path.expanduser(FILENAME), 'r') + + # Output HDF5 file information + logging.info(fileID.filename) + logging.info(list(fileID.keys())) + + # allocate python dictionaries for ICESat-2 ATL03 variables and attributes + IS2_atl03_mds = {} + IS2_atl03_attrs = {} + + # read each input beam within the file + IS2_atl03_beams = [] + for gtx in [k for k in fileID.keys() if bool(re.match(r'gt\d[lr]',k))]: + # check if subsetted beam contains data + # check in both the geolocation and heights groups + try: + fileID[gtx]['geolocation']['segment_id'] + fileID[gtx]['heights']['delta_time'] + except KeyError: + pass + else: + IS2_atl03_beams.append(gtx) + + # for each included beam + for gtx in IS2_atl03_beams: + + # ------------------------------------------- + # 1. make sure the beam-level dict exists + IS2_atl03_attrs.setdefault(gtx, {}) + # 2. always save the two “must-have” attributes + for key in ('atlas_beam_type', 'atlas_spot_number'): + IS2_atl03_attrs[gtx][key] = fileID[gtx].attrs[key] + + # get each HDF5 variable + IS2_atl03_mds[gtx] = {} + IS2_atl03_mds[gtx]['heights'] = {} + IS2_atl03_mds[gtx]['geolocation'] = {} + IS2_atl03_mds[gtx]['bckgrd_atlas'] = {} + IS2_atl03_mds[gtx]['geophys_corr'] = {} + # ICESat-2 Measurement Group + for key,val in fileID[gtx]['heights'].items(): + IS2_atl03_mds[gtx]['heights'][key] = val[:] + # ICESat-2 Geolocation Group + for key,val in fileID[gtx]['geolocation'].items(): + IS2_atl03_mds[gtx]['geolocation'][key] = val[:] + # ICESat-2 Background Photon Rate Group + for key,val in fileID[gtx]['bckgrd_atlas'].items(): + IS2_atl03_mds[gtx]['bckgrd_atlas'][key] = val[:] + # ICESat-2 Geophysical Corrections Group: Values for tides (ocean, + # solid earth, pole, load, and equilibrium), inverted barometer (IB) + # effects, and range corrections for tropospheric delays + for key,val in fileID[gtx]['geophys_corr'].items(): + IS2_atl03_mds[gtx]['geophys_corr'][key] = val[:] + + # Getting attributes of included variables + if ATTRIBUTES: + # Getting attributes of IS2_atl03_mds beam variables + IS2_atl03_attrs[gtx] = {} + IS2_atl03_attrs[gtx]['heights'] = {} + IS2_atl03_attrs[gtx]['geolocation'] = {} + IS2_atl03_attrs[gtx]['bckgrd_atlas'] = {} + IS2_atl03_attrs[gtx]['geophys_corr'] = {} + + # Global Group Attributes + for att_name,att_val in fileID[gtx].attrs.items(): + IS2_atl03_attrs[gtx][att_name] = att_val + # ICESat-2 Measurement Group + for key,val in fileID[gtx]['heights'].items(): + IS2_atl03_attrs[gtx]['heights'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['heights'][key][att_name]=att_val + # ICESat-2 Geolocation Group + for key,val in fileID[gtx]['geolocation'].items(): + IS2_atl03_attrs[gtx]['geolocation'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['geolocation'][key][att_name]=att_val + # ICESat-2 Background Photon Rate Group + for key,val in fileID[gtx]['bckgrd_atlas'].items(): + IS2_atl03_attrs[gtx]['bckgrd_atlas'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['bckgrd_atlas'][key][att_name]=att_val + # ICESat-2 Geophysical Corrections Group + for key,val in fileID[gtx]['geophys_corr'].items(): + IS2_atl03_attrs[gtx]['geophys_corr'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['geophys_corr'][key][att_name]=att_val + + # ICESat-2 spacecraft orientation at time + IS2_atl03_mds['orbit_info'] = {} + IS2_atl03_attrs['orbit_info'] = {} + for key,val in fileID['orbit_info'].items(): + IS2_atl03_mds['orbit_info'][key] = val[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Global Group Attributes + for att_name,att_val in fileID['orbit_info'].attrs.items(): + IS2_atl03_attrs['orbit_info'][att_name] = att_val + # Variable Attributes + IS2_atl03_attrs['orbit_info'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs['orbit_info'][key][att_name] = att_val + + # information ancillary to the data product + # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC) + # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC) + # Add this value to delta time parameters to compute full gps_seconds + # could alternatively use the Julian day of the ATLAS SDP epoch: 2458119.5 + # and add leap seconds since 2018-01-01T00:00:00Z UTC (ATLAS SDP epoch) + IS2_atl03_mds['ancillary_data'] = {} + IS2_atl03_attrs['ancillary_data'] = {} + ancillary_keys = ['atlas_sdp_gps_epoch','data_end_utc','data_start_utc', + 'end_cycle','end_geoseg','end_gpssow','end_gpsweek','end_orbit', + 'end_region','end_rgt','granule_end_utc','granule_start_utc','release', + 'start_cycle','start_geoseg','start_gpssow','start_gpsweek', + 'start_orbit','start_region','start_rgt','version'] + for key in ancillary_keys: + # get each HDF5 variable + IS2_atl03_mds['ancillary_data'][key] = fileID['ancillary_data'][key][:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs['ancillary_data'][key] = {} + for att_name,att_val in fileID['ancillary_data'][key].attrs.items(): + IS2_atl03_attrs['ancillary_data'][key][att_name] = att_val + + # transmit-echo-path (tep) parameters + IS2_atl03_mds['ancillary_data']['tep'] = {} + IS2_atl03_attrs['ancillary_data']['tep'] = {} + for key,val in fileID['ancillary_data']['tep'].items(): + # get each HDF5 variable + IS2_atl03_mds['ancillary_data']['tep'][key] = val[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs['ancillary_data']['tep'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs['ancillary_data']['tep'][key][att_name] = att_val + + # channel dead time and first photon bias derived from ATLAS calibration + cal1,cal2 = ('ancillary_data','calibrations') + for var in ['dead_time','first_photon_bias']: + IS2_atl03_mds[cal1][var] = {} + IS2_atl03_attrs[cal1][var] = {} + for key,val in fileID[cal1][cal2][var].items(): + # get each HDF5 variable + if isinstance(val, h5py.Dataset): + IS2_atl03_mds[cal1][var][key] = val[:] + elif isinstance(val, h5py.Group): + IS2_atl03_mds[cal1][var][key] = {} + for k,v in val.items(): + IS2_atl03_mds[cal1][var][key][k] = v[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs[cal1][var][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[cal1][var][key][att_name] = att_val + if isinstance(val, h5py.Group): + for k,v in val.items(): + IS2_atl03_attrs[cal1][var][key][k] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[cal1][var][key][k][att_name]=att_val + + # get ATLAS impulse response variables for the transmitter echo path (TEP) + tep1,tep2 = ('atlas_impulse_response','tep_histogram') + IS2_atl03_mds[tep1] = {} + IS2_atl03_attrs[tep1] = {} + for pce in ['pce1_spot1','pce2_spot3']: + IS2_atl03_mds[tep1][pce] = {tep2:{}} + IS2_atl03_attrs[tep1][pce] = {tep2:{}} + # for each TEP variable + for key,val in fileID[tep1][pce][tep2].items(): + IS2_atl03_mds[tep1][pce][tep2][key] = val[:] + # Getting attributes of included variables + if ATTRIBUTES: + # Global Group Attributes + for att_name,att_val in fileID[tep1][pce][tep2].attrs.items(): + IS2_atl03_attrs[tep1][pce][tep2][att_name] = att_val + # Variable Attributes + IS2_atl03_attrs[tep1][pce][tep2][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[tep1][pce][tep2][key][att_name] = att_val + + # Global File Attributes + if ATTRIBUTES: + for att_name,att_val in fileID.attrs.items(): + IS2_atl03_attrs[att_name] = att_val + + # Closing the HDF5 file + fileID.close() + # Return the datasets and variables + return (IS2_atl03_mds,IS2_atl03_attrs,IS2_atl03_beams) + + + +# convert_wgs_to_utm function, see https://stackoverflow.com/a/40140326/4556479 +def convert_wgs_to_utm(lon: float, lat: float): + """Based on lat and lng, return best utm epsg-code""" + utm_band = str((math.floor((lon + 180) / 6) % 60) + 1) + if len(utm_band) == 1: + utm_band = '0' + utm_band + if lat >= 0: + epsg_code = 'epsg:326' + utm_band + return epsg_code + epsg_code = 'epsg:327' + utm_band + return epsg_code + + +def orthometric_correction(lat, lon, Z, epsg): + # Define the Proj string + #To transform from WGS84 ellipsoidal height + # to EGM2008 orthometric height using PyProj + # proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +vunits=m +no_defs +geoidgrids=egm2008-1.gtx' + # # Define the Proj string for WGS84 ellipsoidal height + # wgs84_proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs' + + # # Define the Proj string for EGM2008 orthometric height: egm08_25,egm2008-1 + # egm2008_proj_string = \ + # '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs ' \ + # '+geoidgrids=C:/Workstation/ICESat2_HLS/Code/Geoids/egm08_25.gtx' + + # transform ellipsoid (WGS84) height to orthometric height + # transformer = Transformer.from_crs(wgs84_proj_string, egm2008_proj_string, always_xy=True) + transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True) + X_egm08, Y_egm08, Z_egm08 = transformer.transform(lon, lat, Z) + + # transform WGS84 proj to local UTM + myProj = Proj(epsg) + X_utm, Y_utm = myProj(lon, lat) + + return Y_utm, X_utm, Z_egm08 + + + +def load_data(file_path, read_attributes=False): + """ + Wrapper around read_granule that lets you decide + whether to pull the full attribute tree. + + Parameters + ---------- + file_path : str + Path to the ATL03 HDF5 granule. + read_attributes : bool, optional + If True, read_granule returns the full IS2_atl03_attrs tree. + Defaults to False. + """ + + + return read_granule(file_path, ATTRIBUTES = read_attributes) + + + +# requires that the input gdf has ranged index values i +# will need to change if index is changed to time or something +# this currently checks point in polygon for EVERY point +# would be significantly sped up if evaluated at 10m or something similar +# maybe later, fine for now +def isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF): + # try loading the shoreline data + try: + ICESat2_GDF.insert(0, 'lat', ICESat2_GDF.geometry.y, False) + ICESat2_GDF.insert(0, 'lon', ICESat2_GDF.geometry.x, False) + + # allocation of to be used arrays + zero_int_array = np.int64(np.zeros_like(ICESat2_GDF.geometry.x)) + + # Land flag initialized as -1 + # If shorelines downloaded already, will be set to 0 or 1 + ICESat2_GDF.insert(0, 'is_land', + zero_int_array - 1, False) + + # set the projection + ICESat2_GDF.set_crs("EPSG:4326", inplace=True) + + # load shoreline dataset to include only the features that intersect the bounding box + # bbox can be GeoDataFrame or GeoSeries | shapely Geometry, default None + # Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely geometry. + # engine str, 'fiona' or 'pyogrio' + # somtime it gives error if using fiona + # land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF, engine='fiona') + land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF, engine='pyogrio') + + # continue with getting a new array of 0-or-1 labels for each photon + land_point_labels = np.zeros_like(ICESat2_GDF.is_land.values) + + # update labels for points in the land polygons + pts_in_land = gpd.sjoin(ICESat2_GDF, land_polygon_gdf, predicate='within') + + # get land or not bool value + land_loc = ICESat2_GDF.index.isin(pts_in_land.index) + + # asigned them to new numpy array + land_point_labels[land_loc] = 1 + land_point_labels[~land_loc] = 0 + + return land_point_labels + + except Exception as e: + + print(e) + + print("Error loading shoreline data, returning -1s for is_land flag") + + # if the shoreline data is not available + # return the original label array + + return -np.ones_like(ICESat2_GDF.is_land.values) + + +def create_photon_dataframe(lat_ph, lon_ph, ref_elev, ref_azimuth, geoid, h_ph, \ + quality_ph, is_land_label_interp1d, signal_conf_photon, x_atc, relative_AT_dist, + solar_elevation=None, background_rate=None): + # Apply geoid correction to the photon heights to convert them from ellipsoidal to orthometric heights + h_ph_geoid_cor = h_ph[:] - geoid[:] + + # Determine the EPSG code for the UTM zone based on the first photon's longitude and latitude + epsg_code = convert_wgs_to_utm(lon_ph[0], lat_ph[0]) + + # Perform orthometric correction to obtain UTM coordinates and corrected heights + lat_utm, lon_utm, h_ph_cor = orthometric_correction(lat_ph, lon_ph, h_ph, epsg_code) + + # Put the data into the dataframe + sea_photon_dataset = pd.DataFrame({ + 'latitude': lat_ph, + 'longitude': lon_ph, + 'lat': lat_utm, + 'lon': lon_utm, + 'photon_height': h_ph_geoid_cor, + 'quality_ph': quality_ph, + 'is_land_label': is_land_label_interp1d, + 'photon_conf': signal_conf_photon, + 'ref_elevation': ref_elev, + 'ref_azimuth': ref_azimuth, + 'relative_AT_dist': relative_AT_dist + }, columns=['latitude', 'longitude', 'lat', 'lon', 'photon_height', 'quality_ph', 'is_land_label', 'photon_conf', 'ref_elevation', 'ref_azimuth', 'relative_AT_dist']) + + if solar_elevation is not None: + sea_photon_dataset['solar_elevation'] = solar_elevation + if background_rate is not None: + sea_photon_dataset['background_rate'] = background_rate + + return sea_photon_dataset + + +def interpolate_by_time(source_time, source_values, target_time): + """Linearly interpolate source_values from source_time to target_time.""" + source_time = np.asarray(source_time) + source_values = np.asarray(source_values) + target_time = np.asarray(target_time) + + valid_mask = np.isfinite(source_time) & np.isfinite(source_values) + if valid_mask.sum() < 2: + return np.full_like(target_time, np.nan, dtype=float) + + sorted_idx = np.argsort(source_time[valid_mask]) + sorted_time = source_time[valid_mask][sorted_idx] + sorted_values = source_values[valid_mask][sorted_idx] + + unique_time, unique_idx = np.unique(sorted_time, return_index=True) + unique_values = sorted_values[unique_idx] + if unique_time.size < 2: + return np.full_like(target_time, unique_values[0], dtype=float) + + return np.interp(target_time, unique_time, unique_values, left=unique_values[0], right=unique_values[-1]) + + +def apply_optional_solar_background_filter( + sea_photon_dataset, + enabled=False, + day_threshold=6.0, + background_quantile=0.8, + min_signal_conf=2 +): + """ + Optionally filter photons under strong daytime solar background conditions. + Keeps high-confidence photons in high-background segments. + """ + if not enabled: + return sea_photon_dataset + + required_cols = {'solar_elevation', 'background_rate', 'photon_conf'} + if not required_cols.issubset(set(sea_photon_dataset.columns)): + logger.warning("Solar background filter skipped because required columns are missing.") + return sea_photon_dataset + + filtered_dataset = sea_photon_dataset.copy() + daytime_mask = filtered_dataset['solar_elevation'] >= day_threshold + daytime_background = filtered_dataset.loc[daytime_mask, 'background_rate'] + daytime_background = daytime_background[np.isfinite(daytime_background)] + + if daytime_background.empty: + logger.info("Solar background filter enabled, but no valid daytime photons were found.") + return filtered_dataset + + quantile_threshold = daytime_background.quantile(background_quantile) + noisy_daytime_mask = daytime_mask & (filtered_dataset['background_rate'] >= quantile_threshold) + keep_mask = (~noisy_daytime_mask) | (filtered_dataset['photon_conf'] >= min_signal_conf) + + before_count = len(filtered_dataset) + filtered_dataset = filtered_dataset.loc[keep_mask].copy() + after_count = len(filtered_dataset) + logger.info( + "Solar background filter removed %s photons (from %s to %s).", + before_count - after_count, before_count, after_count + ) + return filtered_dataset + + +def apply_optional_ir_ap_filter( + sea_photon_dataset, + enabled=False, + quality_max=0, + min_signal_conf=0 +): + """ + Optionally remove likely flagged photons as an IR/afterpulse proxy. + This uses available ATL03 photon fields: + - quality_ph: keep <= quality_max + - photon_conf: keep >= min_signal_conf + """ + if not enabled: + return sea_photon_dataset + + filtered_dataset = sea_photon_dataset.copy() + keep_mask = np.ones(len(filtered_dataset), dtype=bool) + + if 'quality_ph' in filtered_dataset.columns: + quality_values = pd.to_numeric(filtered_dataset['quality_ph'], errors='coerce') + keep_mask &= quality_values <= quality_max + else: + logger.warning("IR/AP filter enabled, but quality_ph is missing.") + + if 'photon_conf' in filtered_dataset.columns: + conf_values = pd.to_numeric(filtered_dataset['photon_conf'], errors='coerce') + keep_mask &= conf_values >= min_signal_conf + else: + logger.warning("IR/AP filter enabled, but photon_conf is missing.") + + before_count = len(filtered_dataset) + filtered_dataset = filtered_dataset.loc[keep_mask].copy() + after_count = len(filtered_dataset) + logger.info( + "IR/AP proxy filter removed %s photons (from %s to %s).", + before_count - after_count, before_count, after_count + ) + return filtered_dataset + + +def Extract_sea_photons(IS2_atl03_mds, target_strong_beams, shoreline_data_path): + Segment_ID = {} + Segment_Index_begin = {} + Segment_PE_count = {} + Equator_Segment_Distance = {} + Segment_Length = {} + Segment_Is_Land = {} + Segment_Lon = {} + Segment_Lat = {} + Segment_Elev = {} + Segment_Time = {} + Segment_ref_elev = {} + Segment_ref_azimuth = {} + background_rate = {} + background_counts = {} + + # Initialize a list to store data for each beam + beam_datasets = [] + + + # Loop over each strong beam in target_strong_beams + for gtx in target_strong_beams: + print('Processing strong beam ID:', gtx) + + # Access the data for the current beam + IS2_val = IS2_atl03_mds[gtx] + + # Initialize dictionaries to store segment data for the beam + Segment_ID[gtx] = IS2_val['geolocation']['segment_id'] + n_seg = len(Segment_ID[gtx]) + n_pe, = IS2_val['heights']['delta_time'].shape + Segment_Index_begin[gtx] = IS2_val['geolocation']['ph_index_beg'] - 1 + Segment_PE_count[gtx] = IS2_val['geolocation']['segment_ph_cnt'] + Equator_Segment_Distance[gtx] = IS2_val['geolocation']['segment_dist_x'] + Segment_Length[gtx] = IS2_val['geolocation']['segment_length'] + delta_time = IS2_val['geolocation']['delta_time'] + segment_lat = IS2_val['geolocation']['reference_photon_lat'][:].copy() + segment_lon = IS2_val['geolocation']['reference_photon_lon'][:].copy() + ref_elev = IS2_val['geolocation']['ref_elev'][:].copy() + ref_azimuth = IS2_val['geolocation']['ref_azimuth'][:].copy() + geoid = IS2_val['geophys_corr']['geoid'][:].copy() + h_ph = IS2_val['heights']['h_ph'][:].copy() + photon_delta_time = IS2_val['heights']['delta_time'][:].copy() + lat_ph = IS2_val['heights']['lat_ph'][:].copy() + lon_ph = IS2_val['heights']['lon_ph'][:].copy() + signal_conf_photon = IS2_val['heights']['signal_conf_ph'][..., 0].copy() + x_atc = IS2_val['heights']['dist_ph_along'][:].copy() + y_atc = IS2_val['heights']['dist_ph_across'][:].copy() + quality_ph = IS2_val['heights']['quality_ph'] + + # Optional variables for solar background sensitivity testing + photon_solar_elevation = np.full_like(photon_delta_time, np.nan, dtype=float) + photon_background_rate = np.full_like(photon_delta_time, np.nan, dtype=float) + if 'solar_elevation' in IS2_val['geolocation']: + segment_solar_elevation = IS2_val['geolocation']['solar_elevation'][:].copy() + photon_solar_elevation = interpolate_by_time( + delta_time, segment_solar_elevation, photon_delta_time + ) + if 'bckgrd_atlas' in IS2_val and 'bckgrd_rate' in IS2_val['bckgrd_atlas'] and 'delta_time' in IS2_val['bckgrd_atlas']: + bckgrd_rate = IS2_val['bckgrd_atlas']['bckgrd_rate'][:].copy() + bckgrd_time = IS2_val['bckgrd_atlas']['delta_time'][:].copy() + photon_background_rate = interpolate_by_time( + bckgrd_time, bckgrd_rate, photon_delta_time + ) + + # Adjust x_atc based on segment distances + for seg_index in range(n_seg): + idx = Segment_Index_begin[gtx][seg_index] + cnt = Segment_PE_count[gtx][seg_index] + x_atc[idx:idx + cnt] += Equator_Segment_Distance[gtx][seg_index] + + # Calculate relative distances + relative_AT_dist = (x_atc - x_atc[0]) / 1000 + relative_seg_dist = (Equator_Segment_Distance[gtx] - Equator_Segment_Distance[gtx][0]) / 1000 + + # Create a GeoDataFrame to hold segment data for shoreline check + Segment_Is_Land['geometry'] = gpd.points_from_xy(segment_lon, segment_lat) + ICESat2_GDF = gpd.GeoDataFrame(Segment_Is_Land, crs="EPSG:4326") + + # Determine if it is land by the land/sea mask + Segment_Is_Land_Labels = isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF) + ICESat2_GDF.loc[:, 'is_land'] = Segment_Is_Land_Labels + + # Apply interpolations for required data + is_land_label_interp1d = apply_interpolation(interpolate_labels(segment_lat, Segment_Is_Land_Labels), lat_ph) + ph_ref_elev = apply_interpolation(interpolate_labels(segment_lat, ref_elev), lat_ph) + ph_ref_azimuth = apply_interpolation(interpolate_labels(segment_lat, ref_azimuth), lat_ph) + ph_geoid = apply_interpolation(interpolate_labels(segment_lat, geoid), lat_ph) + + + # Create photon DataFrame for the current beam + sea_photon_dataset = create_photon_dataframe(lat_ph=lat_ph, lon_ph=lon_ph, + ref_elev=ph_ref_elev,ref_azimuth=ph_ref_azimuth, + geoid=ph_geoid,h_ph=h_ph, + quality_ph=quality_ph, + is_land_label_interp1d=is_land_label_interp1d, + signal_conf_photon=signal_conf_photon, + x_atc=x_atc, # Note: x_atc is not used in the function. Remove if unnecessary. + relative_AT_dist=relative_AT_dist, + solar_elevation=photon_solar_elevation, + background_rate=photon_background_rate) + + # Filter out land photons + sea_photon_dataset = sea_photon_dataset[sea_photon_dataset['is_land_label'] != 1] + + # Add a new column to indicate the beam ID + sea_photon_dataset['beam_id'] = gtx + + # Append the processed dataset for the current beam to the list + beam_datasets.append(sea_photon_dataset) + + # Concatenate all beam data into a single DataFrame + all_beams_dataset = pd.concat(beam_datasets, ignore_index=True) + + return all_beams_dataset + + +def filter_photon_dataset_by_hull_area(photon_dataset, hull_area_threshold=3000): + """ + Filters photon dataset based on ConvexHull area threshold and returns the filtered dataset, + convex hull areas, and convex hull points. + """ + lat_bins_grouped = photon_dataset.groupby('lat_bins', observed=False) + filtered_dataset = photon_dataset.copy() + + convex_hulls = {} + convex_hull_areas = {} + + for lat_bin, bin_data in lat_bins_grouped: + if len(bin_data) >= 3: + points = bin_data[['lat', 'photon_height']].to_numpy() + hull = ConvexHull(points) + area = hull.volume + convex_hull_areas[lat_bin] = area + + # Store the ConvexHull points if area meets the threshold + if area >= hull_area_threshold: + convex_hulls[lat_bin] = points[hull.vertices] + else: + # Remove bins with hull area below the threshold + filtered_dataset = filtered_dataset[filtered_dataset['lat_bins'] != lat_bin] + + return filtered_dataset, convex_hull_areas, convex_hulls + + + + + + + + + +########################## +##Discard Functions Below +########################## + + +# Extracts key information from filenames, +# which could include processed status, product ID, timestamps, identifiers, or metadata. +def extract_file_params(file_path): + ''' + Example: input "processed_ATL06_20231115120000_20231115_001_12_data.h5" + Output: ('processed_', 'ATL06', '2023', '11', '15', '12', '00', '00', + '2023', '11', '15', '001', '12', '_data') + ''' + # Defines a regex pattern to match strings in the file path + rx = re.compile(r'(processed_)?(ATL\d{2})_(\d{4})(\d{2})(\d{2})(\d{2})' + r'(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$') + # Searches for all matches of the regex pattern in the given + params = rx.findall(file_path).pop() + return params + + +def create_mask(binned_data, sea_surface_height, seafloor_height, threshold_height): + ''' + create a mask for filtering sea surface, sea floor, and threshold_height + ''' + mask = (binned_data['height'] <= sea_surface_height) & \ + (binned_data['height'] >= seafloor_height) & \ + (binned_data['height'] >= threshold_height) + return mask + + +def generate_polygons_from_binned_data(lat, height, mask, lat_interval, height_interval): + ''' + horizontal_vertical_bin_dataset + Generate polygons for each bin after masking within a rectangle formed by height and latitude intervals + ''' + # Create latitude bins based on the specified interval + lat_bins = np.arange(lat.min(), lat.max() + lat_interval, lat_interval) + + # Create height bins within the range [-12, 2] based on the specified interval + height_bins = np.arange(-12, 2 + height_interval, height_interval) + + # Identify which bin each masked latitude and height value belongs to + lat_bin_indices = np.digitize(lat[mask], lat_bins) + + # Determine which height bin each masked point belongs to + height_bin_indices = np.digitize(height[mask], height_bins) + + # Combine latitude and height bin indices for each point + combined_bins = list(zip(lat_bin_indices, height_bin_indices)) + + # Find unique bin combinations and count the number of points in each bin + unique_bins, counts = np.unique(combined_bins, axis=0, return_counts=True) + + # Filter bins to include only those within valid index ranges + valid_bins = [(bin[0], bin[1]) for bin in unique_bins if 1 <= bin[0] < len(lat_bins) and 1 <= bin[1] < len(height_bins)] + + polygons = [] + for bin_lat, bin_height in valid_bins: + if bin_lat > 0 and bin_lat < len(lat_bins) and bin_height > 0 and bin_height < len(height_bins): + bin_points = np.array([(lat[mask][i], height[mask][i]) for i in range(sum(mask)) + if lat_bin_indices[i] == bin_lat and height_bin_indices[i] == bin_height]) + if len(bin_points) > 2: + hull = ConvexHull(bin_points) + polygons.append(bin_points[hull.vertices]) + + return lat_bins, height_bins, valid_bins, polygons + + + diff --git a/icesat-2_kdph_py/kd_utils/interpolation.py b/icesat-2_kdph_py/kd_utils/interpolation.py new file mode 100644 index 0000000..59b04b4 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/interpolation.py @@ -0,0 +1,19 @@ +# utils/interpolation.py + +import scipy.interpolate + +def interpolate_labels(segment_lat, labels): + model = scipy.interpolate.interp1d(segment_lat, labels, fill_value="extrapolate") + return model + +def apply_interpolation(model, lat_ph): + return model(lat_ph) + +def geoid_correction(lat_ph, segment_lat, geoid): + model = interpolate_labels(segment_lat, geoid) + return apply_interpolation(model, lat_ph) + +def refraction_correction(lat_ph, segment_lat, ref_elev, ref_azimuth): + elev_model = interpolate_labels(segment_lat, ref_elev) + azimuth_model = interpolate_labels(segment_lat, ref_azimuth) + return apply_interpolation(elev_model, lat_ph), apply_interpolation(azimuth_model, lat_ph) diff --git a/icesat-2_kdph_py/kd_utils/kd_utils.py b/icesat-2_kdph_py/kd_utils/kd_utils.py new file mode 100644 index 0000000..39c3687 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/kd_utils.py @@ -0,0 +1,1181 @@ +#!/usr/bin/env python +# coding: utf-8 + +##### +# This group of functions processes ICESat-2 data and creates a bathymetric model. +# To do this, it follows a number of steps in the form of functions, including: +# 1. Reading data (ReadATL03()) +# 2. Orthometrically correcting the dataset (OrthometricCorrection()) +# 3. Pulling down the data segment ID (getAtl03SegID()) +# 4. Bin the data along latitudinal and height gradients (bin_data()) +# 5. Calculate sea height (get_sea_height()) +# 6. Get water temperature (get_water_temp()) +# 7. Correct bathymetric surface for refraction (RefractionCorrection()) +# 8. Calculate bathymetric height (get_bath_height()) +# 9. Produce figures (produce_figures()) +##### +import os +# os.environ["PROJ_LIB"] = r"C:\Users\wayne\anaconda3\Library\share\proj" + +import io +import os +import re +import time +import math +import h5py +import logging +import netCDF4 +import numpy as np +import geopandas as gpd + +import pandas as pd +from datetime import datetime +import matplotlib.pyplot as plt +import hdbscan +from sklearn.preprocessing import StandardScaler, MinMaxScaler + +import argparse +import subprocess +# import fiona +import utm +import pyproj +from pyproj import Transformer, Proj + +# from pyproj import Transformer +from matplotlib.widgets import LassoSelector +from matplotlib.path import Path + +from sklearn.cluster import DBSCAN + +#this function from icesat2_toolkit +# PURPOSE: read ICESat-2 ATL03 HDF5 data files +def read_granule(FILENAME, ATTRIBUTES=False, **kwargs): + """ + Reads ICESat-2 ATL03 Global Geolocated Photons data files + + Parameters + ---------- + FILENAME: str + full path to ATL03 file + ATTRIBUTES: bool, default False + read file, group and variable attributes + + Returns + ------- + IS2_atl03_mds: dict + ATL03 variables + IS2_atl03_attrs: dict + ATL03 attributes + IS2_atl03_beams: list + valid ICESat-2 beams within ATL03 file + """ + # Open the HDF5 file for reading + if isinstance(FILENAME, io.IOBase): + fileID = h5py.File(FILENAME, 'r') + else: + fileID = h5py.File(os.path.expanduser(FILENAME), 'r') + + # Output HDF5 file information + logging.info(fileID.filename) + logging.info(list(fileID.keys())) + + # allocate python dictionaries for ICESat-2 ATL03 variables and attributes + IS2_atl03_mds = {} + IS2_atl03_attrs = {} + + # read each input beam within the file + IS2_atl03_beams = [] + for gtx in [k for k in fileID.keys() if bool(re.match(r'gt\d[lr]',k))]: + # check if subsetted beam contains data + # check in both the geolocation and heights groups + try: + fileID[gtx]['geolocation']['segment_id'] + fileID[gtx]['heights']['delta_time'] + except KeyError: + pass + else: + IS2_atl03_beams.append(gtx) + + # for each included beam + for gtx in IS2_atl03_beams: + # get each HDF5 variable + IS2_atl03_mds[gtx] = {} + IS2_atl03_mds[gtx]['heights'] = {} + IS2_atl03_mds[gtx]['geolocation'] = {} + IS2_atl03_mds[gtx]['bckgrd_atlas'] = {} + IS2_atl03_mds[gtx]['geophys_corr'] = {} + # ICESat-2 Measurement Group + for key,val in fileID[gtx]['heights'].items(): + IS2_atl03_mds[gtx]['heights'][key] = val[:] + # ICESat-2 Geolocation Group + for key,val in fileID[gtx]['geolocation'].items(): + IS2_atl03_mds[gtx]['geolocation'][key] = val[:] + # ICESat-2 Background Photon Rate Group + for key,val in fileID[gtx]['bckgrd_atlas'].items(): + IS2_atl03_mds[gtx]['bckgrd_atlas'][key] = val[:] + # ICESat-2 Geophysical Corrections Group: Values for tides (ocean, + # solid earth, pole, load, and equilibrium), inverted barometer (IB) + # effects, and range corrections for tropospheric delays + for key,val in fileID[gtx]['geophys_corr'].items(): + IS2_atl03_mds[gtx]['geophys_corr'][key] = val[:] + + # Getting attributes of included variables + if ATTRIBUTES: + # Getting attributes of IS2_atl03_mds beam variables + IS2_atl03_attrs[gtx] = {} + IS2_atl03_attrs[gtx]['heights'] = {} + IS2_atl03_attrs[gtx]['geolocation'] = {} + IS2_atl03_attrs[gtx]['bckgrd_atlas'] = {} + IS2_atl03_attrs[gtx]['geophys_corr'] = {} + # Global Group Attributes + for att_name,att_val in fileID[gtx].attrs.items(): + IS2_atl03_attrs[gtx][att_name] = att_val + # ICESat-2 Measurement Group + for key,val in fileID[gtx]['heights'].items(): + IS2_atl03_attrs[gtx]['heights'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['heights'][key][att_name]=att_val + # ICESat-2 Geolocation Group + for key,val in fileID[gtx]['geolocation'].items(): + IS2_atl03_attrs[gtx]['geolocation'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['geolocation'][key][att_name]=att_val + # ICESat-2 Background Photon Rate Group + for key,val in fileID[gtx]['bckgrd_atlas'].items(): + IS2_atl03_attrs[gtx]['bckgrd_atlas'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['bckgrd_atlas'][key][att_name]=att_val + # ICESat-2 Geophysical Corrections Group + for key,val in fileID[gtx]['geophys_corr'].items(): + IS2_atl03_attrs[gtx]['geophys_corr'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[gtx]['geophys_corr'][key][att_name]=att_val + + # ICESat-2 spacecraft orientation at time + IS2_atl03_mds['orbit_info'] = {} + IS2_atl03_attrs['orbit_info'] = {} + for key,val in fileID['orbit_info'].items(): + IS2_atl03_mds['orbit_info'][key] = val[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Global Group Attributes + for att_name,att_val in fileID['orbit_info'].attrs.items(): + IS2_atl03_attrs['orbit_info'][att_name] = att_val + # Variable Attributes + IS2_atl03_attrs['orbit_info'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs['orbit_info'][key][att_name] = att_val + + # information ancillary to the data product + # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC) + # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC) + # Add this value to delta time parameters to compute full gps_seconds + # could alternatively use the Julian day of the ATLAS SDP epoch: 2458119.5 + # and add leap seconds since 2018-01-01T00:00:00Z UTC (ATLAS SDP epoch) + IS2_atl03_mds['ancillary_data'] = {} + IS2_atl03_attrs['ancillary_data'] = {} + ancillary_keys = ['atlas_sdp_gps_epoch','data_end_utc','data_start_utc', + 'end_cycle','end_geoseg','end_gpssow','end_gpsweek','end_orbit', + 'end_region','end_rgt','granule_end_utc','granule_start_utc','release', + 'start_cycle','start_geoseg','start_gpssow','start_gpsweek', + 'start_orbit','start_region','start_rgt','version'] + for key in ancillary_keys: + # get each HDF5 variable + IS2_atl03_mds['ancillary_data'][key] = fileID['ancillary_data'][key][:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs['ancillary_data'][key] = {} + for att_name,att_val in fileID['ancillary_data'][key].attrs.items(): + IS2_atl03_attrs['ancillary_data'][key][att_name] = att_val + + # transmit-echo-path (tep) parameters + IS2_atl03_mds['ancillary_data']['tep'] = {} + IS2_atl03_attrs['ancillary_data']['tep'] = {} + for key,val in fileID['ancillary_data']['tep'].items(): + # get each HDF5 variable + IS2_atl03_mds['ancillary_data']['tep'][key] = val[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs['ancillary_data']['tep'][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs['ancillary_data']['tep'][key][att_name] = att_val + + # channel dead time and first photon bias derived from ATLAS calibration + cal1,cal2 = ('ancillary_data','calibrations') + for var in ['dead_time','first_photon_bias']: + IS2_atl03_mds[cal1][var] = {} + IS2_atl03_attrs[cal1][var] = {} + for key,val in fileID[cal1][cal2][var].items(): + # get each HDF5 variable + if isinstance(val, h5py.Dataset): + IS2_atl03_mds[cal1][var][key] = val[:] + elif isinstance(val, h5py.Group): + IS2_atl03_mds[cal1][var][key] = {} + for k,v in val.items(): + IS2_atl03_mds[cal1][var][key][k] = v[:] + # Getting attributes of group and included variables + if ATTRIBUTES: + # Variable Attributes + IS2_atl03_attrs[cal1][var][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[cal1][var][key][att_name] = att_val + if isinstance(val, h5py.Group): + for k,v in val.items(): + IS2_atl03_attrs[cal1][var][key][k] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[cal1][var][key][k][att_name]=att_val + + # get ATLAS impulse response variables for the transmitter echo path (TEP) + tep1,tep2 = ('atlas_impulse_response','tep_histogram') + IS2_atl03_mds[tep1] = {} + IS2_atl03_attrs[tep1] = {} + for pce in ['pce1_spot1','pce2_spot3']: + IS2_atl03_mds[tep1][pce] = {tep2:{}} + IS2_atl03_attrs[tep1][pce] = {tep2:{}} + # for each TEP variable + for key,val in fileID[tep1][pce][tep2].items(): + IS2_atl03_mds[tep1][pce][tep2][key] = val[:] + # Getting attributes of included variables + if ATTRIBUTES: + # Global Group Attributes + for att_name,att_val in fileID[tep1][pce][tep2].attrs.items(): + IS2_atl03_attrs[tep1][pce][tep2][att_name] = att_val + # Variable Attributes + IS2_atl03_attrs[tep1][pce][tep2][key] = {} + for att_name,att_val in val.attrs.items(): + IS2_atl03_attrs[tep1][pce][tep2][key][att_name] = att_val + + # Global File Attributes + if ATTRIBUTES: + for att_name,att_val in fileID.attrs.items(): + IS2_atl03_attrs[att_name] = att_val + + # Closing the HDF5 file + fileID.close() + # Return the datasets and variables + return (IS2_atl03_mds,IS2_atl03_attrs,IS2_atl03_beams) + +# convert_wgs_to_utm function, see https://stackoverflow.com/a/40140326/4556479 +def convert_wgs_to_utm(lon: float, lat: float): + """Based on lat and lng, return best utm epsg-code""" + utm_band = str((math.floor((lon + 180) / 6) % 60) + 1) + if len(utm_band) == 1: + utm_band = '0' + utm_band + if lat >= 0: + epsg_code = 'epsg:326' + utm_band + return epsg_code + epsg_code = 'epsg:327' + utm_band + return epsg_code + + +def orthometric_correction(lat, lon, Z, epsg): + # Define the Proj string + #To transform from WGS84 ellipsoidal height + # to EGM2008 orthometric height using PyProj + # proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +vunits=m +no_defs +geoidgrids=egm2008-1.gtx' + # # Define the Proj string for WGS84 ellipsoidal height + # wgs84_proj_string = '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs' + + # # Define the Proj string for EGM2008 orthometric height: egm08_25,egm2008-1 + # egm2008_proj_string = \ + # '+proj=latlong +ellps=WGS84 +datum=WGS84 +no_defs ' \ + # '+geoidgrids=C:/Workstation/ICESat2_HLS/Code/Geoids/egm08_25.gtx' + + # transform ellipsoid (WGS84) height to orthometric height + # transformer = Transformer.from_crs(wgs84_proj_string, egm2008_proj_string, always_xy=True) + transformer = Transformer.from_crs("EPSG:4326", "EPSG:3857", always_xy=True) + X_egm08, Y_egm08, Z_egm08 = transformer.transform(lon, lat, Z) + + # transform WGS84 proj to local UTM + myProj = Proj(epsg) + X_utm, Y_utm = myProj(lon, lat) + + return Y_utm, X_utm, Z_egm08 + + +# Snippet by Eric Guenther (via Amy N.) for assigning photons to a segment +def get_atl03_seg_id(atl03_ph_index_beg, atl03_segment_id, atl03_heights_len): + # We need to know spacecraft orbit info, + # which is provided across segments. + # This first function assigns photons to the segment they belong to. + # We end up making a new array + # that has more points to match the photon. Segment is defined as every 100m in the long track. + + # Filter all data where atl03_ph_index starts at 0 (0 indicates errors) + indsNotZero = atl03_ph_index_beg != 0 + atl03_ph_index_beg = atl03_ph_index_beg[indsNotZero] + atl03_segment_id = atl03_segment_id[indsNotZero] + + # Subtract 1 from ph_index_beg to start at python 0th pos + atl03_ph_index_beg = atl03_ph_index_beg - 1 + + # Sometimes the ph_index_beg is not at the 0th position, it is not, + # add it in and then add the associated segment id + # Warning, this is assuming that the segment id for the points are from + # the segment id directly before it, this assumption might fail, but I have + # not come across a case yet where it does. If you want to play it safe + # you could comment this section out and then if the first position is not + # 0 then all photons before the first position will not be assigned a + # segment id. + # if atl03_ph_index_beg[0] != 0: + # atl03_ph_index_beg = np.append(0,atl03_ph_index_beg) + # first_seg_id = atl03_segment_id[0] -1 + # atl03_segment_id = np.append(first_seg_id,atl03_segment_id) + + # Append atl03_height_len to end of array for final position + atl03_ph_index_beg = np.append(atl03_ph_index_beg, atl03_heights_len) + + # Make array equal to the length of the atl03_heights photon level data + ph_segment_id = np.zeros(atl03_heights_len) + + # Iterate through ph_index_beg, from the first to second to last number + # and set the photons between ph_index_beg i to ph_index_beg i + 1 to + # segment id i + for i in range(0, len(atl03_ph_index_beg) - 1): + ph_segment_id[atl03_ph_index_beg[i]:atl03_ph_index_beg[i + 1]] = atl03_segment_id[i] + + # Return list of segment_id at the photon level + return ph_segment_id + + +def ref_linear_interp(x, y): + # initialize an empty list + arr = [] + + # get unique x values + ux = np.unique(x) + for u in ux: + # get y values for x=u + idx = y[x == u] + + # try to get the y values for x=u-1 and x=u + # if this is not possible, set min and max to y values for x=u + try: + min = y[x == u - 1][0] + max = y[x == u][0] + except: + min = y[x == u][0] + max = y[x == u][0] + + # try to get the y values for x=u and x=u+1 + # if this is not possible, set min and max to y values for x=u + try: + min = y[x == u][0] + max = y[x == u + 1][0] + except: + min = y[x == u][0] + max = y[x == u][0] + + # if the min and max values are the same, + # fill the sub array with the value + if min == max: + sub = np.full((len(idx)), min) + arr.append(sub) + + # if min and max are different, + # create a sub array using linear interpolation + else: + sub = np.linspace(min, max, len(idx)) + arr.append(sub) + + # concatenate all the sub arrays into a single array and return + return np.concatenate(arr, axis=None).ravel() + + +# Bin data along vertical and horizontal scales +def horizontal_vertical_bin_dataset(dataset, lat_res, vertical_res): + """Bin data along vertical and horizontal scales + for later segmentation""" + + # Filter values within the range (-50, 10), because photons elevation outside this range will be real noise + valid_range = (-50, 10) + valid_mask = (dataset['photon_height'] > valid_range[0]) & (dataset['photon_height'] < valid_range[1]) + + # Apply the valid_mask to filter unwanted values + filtered_dataset = dataset[valid_mask] + + # Calculate the number of height bins + height_range = abs(filtered_dataset['photon_height'].max() - filtered_dataset['photon_height'].min()) + height_bin_number = max(1, round(height_range / vertical_res)) # Ensure at least one bin + + # Calculate the number of latitude bins + lat_range = abs(filtered_dataset['lat'].max() - filtered_dataset['lat'].min()) + lat_bin_number = max(1, round(lat_range / lat_res)) # Ensure at least one bin + + # Create bins for latitude + lat_bins = pd.cut(filtered_dataset['lat'], bins=lat_bin_number, labels=np.arange(lat_bin_number)) + + # Create bins for height + height_bins = pd.cut(filtered_dataset['photon_height'], bins=height_bin_number, + labels=np.round(np.linspace(filtered_dataset['photon_height'].min(), + filtered_dataset['photon_height'].max(), + num=height_bin_number), decimals=1)) + + # Add bins to dataframe using .loc to avoid SettingWithCopyWarning + filtered_dataset.loc[:, 'lat_bins'] = lat_bins + filtered_dataset.loc[:, 'height_bins'] = height_bins + filtered_dataset = filtered_dataset.reset_index(drop=True) + + return filtered_dataset + + +# thinking about grid searching to detect bathymetric directly +# rather than bin and then search +def horizontal_vertical_grid_density_cal(dataset, lat_res, vertical_res, density_threshold): + """Bin data along vertical and horizontal scales + and calculate high-density points using a grid method""" + + # Calculate the number of bins required both vertically + lat_bin_number = round(abs(dataset['lat'].min() - dataset['lat'].max()) / lat_res) + # and horizontally based on resolution size + height_bin_number = round(abs(dataset['photon_height'].min() - dataset['photon_height'].max()) / vertical_res) + + # Create the grid + grid = np.zeros((lat_bin_number, height_bin_number)) + + # Iterate over the dataset and assign points to cells + for _, row in dataset.iterrows(): + lat_index = int((row['lat'] - dataset['lat'].min()) / lat_res) + height_index = int((row['photon_height'] - dataset['photon_height'].min()) / vertical_res) + grid[lat_index, height_index] += 1 + + # Identify high-density cells + high_density_cells = np.argwhere(grid > density_threshold) + + # Create a copy of the dataset + dataset_copy = dataset.copy() + + # Assign the cell indices as bins to the dataset + dataset_copy['lat_bins'] = pd.cut(dataset['lat'], bins=lat_bin_number, + labels=np.arange(lat_bin_number)) + dataset_copy['height_bins'] = pd.cut(dataset['photon_height'], bins=height_bin_number, + labels=np.arange(height_bin_number)) + + # Reset the index of the copied dataset + dataset_copy = dataset_copy.reset_index(drop=True) + + return dataset_copy, high_density_cells + + +# Bin data along horizontal scale only +def horizontal_bin_dataset(dataset, lat_res): + """Bin data along the horizontal scale (lat) only + for later segmentation""" + + # Calculate the number of bins required horizontally based on resolution size + lat_bin_number = round(abs(dataset['lat_utm'].min() - dataset['lat_utm'].max()) / lat_res) + + # Cut lat bins + # lat_bins = pd.cut(dataset['lat'], bins=lat_bin_number, labels=np.array(range(lat_bin_number))) + lat_bins = pd.cut(dataset['lat_utm'], bins=lat_bin_number, labels=np.array(range(lat_bin_number))) + + # Create a copy of the dataset + dataset_copy = dataset.copy() + + # Add lat bins to the dataframe + dataset_copy['lat_bins'] = lat_bins + + # Reset the index of the copied dataset + dataset_copy = dataset_copy.reset_index(drop=True) + + return dataset_copy + + +# Bin data first, and then throw away only the surface bin +def get_rm_sea_surface_bin(binned_dataset): + """Calculate mean sea height for easier calculation of depth and cleaner + figures""" + + # set flag for the df save + flag = 1 + + # group dataset by lat bins + grouped_data = binned_dataset.groupby(['lat_bins'], group_keys=True) + data_groups = dict(list(grouped_data)) + + # Loop through groups and return average sea height + for k, v in data_groups.items(): + + lat_bin_average = v['lat'].mean() + + # Create new dataframe based on occurrence of photons per height bin + new_df = pd.DataFrame(v.groupby('height_bins',observed=False).count()) + + # Return the bin with the highest count + largest_h_bin = new_df['lat'].argmax() + + # Select the index of the bin with the highest count + largest_h_index = new_df.index[largest_h_bin] + + # get all values below this bin + # Use boolean indexing to select only the values below the peak bin + new_photon_array_without_peak_bin = v.loc[v['height_bins'] < largest_h_index] + + if flag == 1: + photon_array_without_peak_bin = new_photon_array_without_peak_bin + flag = 2 + + else: + photon_array_without_peak_bin = photon_array_without_peak_bin.append(new_photon_array_without_peak_bin) + + del new_df + + return photon_array_without_peak_bin + + +def get_sea_surface_height(binned_data, threshold): + """Calculate mean sea height for easier calculation of depth and cleaner figures""" + + #set flag for the df save + firstTimeIndex = True + + # Create sea height list + sea_surface_height = [] + mean_lat_bins_seq = [] + sea_surface_subsurface_photons_ratio = [] + + + # Group dataset by latitude bins + grouped_data = binned_data.groupby(['lat_bins'], group_keys=True, observed=False) + data_groups = dict(list(grouped_data)) + + # Loop through groups and return average sea height + for k, v in data_groups.items(): + # based on lat_utm + lat_bin_average = v['lat'].mean() + + # Create new dataframe based on occurrence of photons per height bin + new_df = pd.DataFrame(v.groupby(['height_bins'], observed=False).count()) + + # Return the bin with the highest count + largest_h_bin = new_df['lat'].argmax() + + # Select the index of the bin with the highest count + largest_h_index = new_df.index[largest_h_bin] + + # Calculate the median value of all photon height values within this bin + photons_sea_surface = v.loc[v['height_bins'] == largest_h_index, 'photon_height'] + lat_bin_sea_median = photons_sea_surface.median() + + # Append to sea height list + sea_surface_height.append(lat_bin_sea_median) + mean_lat_bins_seq.append(lat_bin_average) + del new_df + + # Get all photons below sea surface + # to determine segment type of each subsurface water column + # Use calculated sea height to determine photons at 0.5m below peak + photons_sea_surface_up = \ + v.loc[(v['photon_height'] > (lat_bin_sea_median - threshold)) & + (v['photon_height'] < (lat_bin_sea_median + 2*threshold))] + + # Calculate the photon ratio between surface and whole photons + if v['photon_height'].shape[0] > 0: + new_photons_ratio_sea_surface = \ + photons_sea_surface_up.shape[0] / v['photon_height'].shape[0] + else: + new_photons_ratio_sea_surface = np.nan + + sea_surface_subsurface_photons_ratio.append((1 - new_photons_ratio_sea_surface)) + + # Filter out sea height bin values outside 2 SD of mean. + mean = np.nanmean(sea_surface_height, axis=0) + sd = np.nanstd(sea_surface_height, axis=0) + + final_sea_surface_height = np.where((sea_surface_height > (mean + 2 * sd)) | + (sea_surface_height < (mean - 2 * sd)), + np.nan, + sea_surface_height).tolist() + + sea_surface_height_abnormal_label = np.where(np.isnan(final_sea_surface_height), 0, 1) + + # Determine label based on ratio of sea surface photons and subsurface photons + sea_surface_dominated_label = \ + np.where(np.array(sea_surface_subsurface_photons_ratio) >= 0.2, 0, 1) + + # Loop through groups again and return photons below 0.5m of sea height + PhotonDFBelowThresholdPeak = pd.DataFrame() + for i, (k, v) in enumerate(data_groups.items()): + # Get all values below this bin + NewPhotonDFBelowThresholdPeak = \ + v.loc[v['photon_height'] < (final_sea_surface_height[i] - threshold)] + + if firstTimeIndex: + PhotonDFBelowThresholdPeak = NewPhotonDFBelowThresholdPeak + firstTimeIndex=False + else: + PhotonDFBelowThresholdPeak=\ + pd.concat([PhotonDFBelowThresholdPeak, NewPhotonDFBelowThresholdPeak]) + + + return final_sea_surface_height, \ + sea_surface_height_abnormal_label, \ + sea_surface_dominated_label,\ + PhotonDFBelowThresholdPeak + + +# +#Arbitrary cutoff below the max value - 0.5 m below peak +# (we may also use 1 m but we can add that later) (apply to raw photon data, each of 6 beams) +def get_photon_below_sea_surface(binned_data,threshold): + '''Calculate mean sea height for easier calculation of depth and cleaner figures''' + + #set flag for the df save + firstTimeIndex=1 + + # Create sea height list + sea_surface_height = [] +# mean_lat_bins_seq=[] + + grouped_data = binned_data.groupby(['lat_bins'], group_keys=True, observed=False) + data_groups = dict(list(grouped_data)) + + # Loop through groups and return average sea height + for k,v in data_groups.items(): + + lat_bin_average=v['lat_utm'].mean() + + # Create new dataframe based on occurance of photons per height bin + new_df = pd.DataFrame(v.groupby(['height_bins'], observed=False).count()) + + # Return the bin with the highest count + largest_h_bin = new_df['lat_utm'].argmax() + + # Select the index of the bin with the highest count + largest_h = new_df.index[largest_h_bin] + + # Calculate the median value of all values within this bin + lat_bin_sea_median = v.loc[v['height_bins']==largest_h, 'photon_height'].median() + + # Append to sea height list + sea_surface_height.append(lat_bin_sea_median) +# mean_lat_bins_seq.append(lat_bin_average) + del new_df + + # Filter out sea height bin values outside 2 SD of mean. + mean = np.nanmean(sea_surface_height, axis=0) + sd = np.nanstd(sea_surface_height, axis=0) + Final_sea_height = np.where((sea_surface_height > (mean + 2*sd)) | (sea_surface_height < (mean - 2*sd)), np.nan, + sea_surface_height).tolist() + Abnormal_sea_height_label=np.where(np.isnan(Final_sea_height), 1, 0) + + + # Loop through groups again and return photons below 0.5m of sea height + for k,v in data_groups.items(): + + # get all values below this bin + # Use calculated sea height to determine photons at 0.5m below peak + NewPhotonArrayBelowThresholdPeak= \ + v.loc[v['photon_height']<(sea_surface_height[k]-threshold)] + + if firstTimeIndex ==1: + PhotonArrayBelowThresholdPeak=NewPhotonArrayBelowThresholdPeak + firstTimeIndex=2 + + else: + PhotonArrayBelowThresholdPeak=PhotonArrayBelowThresholdPeak.append(NewPhotonArrayBelowThresholdPeak) + + + return PhotonArrayBelowThresholdPeak + + + +# Function to get elevation for multiple points +def get_seafloor_bathy_GEBCO_batch(lons, lats, raster, raster_data): + # Convert geographic coordinates to the raster's coordinate system + rows, cols = raster.index(lons, lats) + rows, cols = np.array(rows), np.array(cols) + + # Ensure the indices are within bounds + valid_mask = (rows >= 0) & (rows < raster.height) & (cols >= 0) & (cols < raster.width) + elevations = np.full(lons.shape, np.nan) + elevations[valid_mask] = raster_data[rows[valid_mask], cols[valid_mask]] + + return elevations + +def get_water_temp(date_year, date_month, date_day, latitude, longitude): + """ + Pull down surface water temperature along the track from the JPL GHRSST opendap website. + + The GHRSST data are gridded tiles with dimension 17998 x 35999. + To get the specific grid tile of the SST, you must convert from lat, lon coordinates + to the gridded tile ratio of the SST data product using the coordinates of the IS2 data. + """ + # Get date from data filename + # data_path[-33:-25] + date = date_year + date_month + date_day + # date[0:4] + year = date_year + # date[4:6] + month = date_month + # date[6:8] + day = date_day + day_of_year = str(datetime.strptime(date, '%Y%m%d').timetuple().tm_yday) + # Add zero in front of day of year string + zero_day_of_year = day_of_year.zfill(3) + + # Calculate ratio of latitude from mid-point of IS2 track + old_lat = latitude.mean() + old_lat_min = -90 + old_lat_max = 90 + new_lat_min = 0 + new_lat_max = 17998 + + new_lat = round(((old_lat - old_lat_min) / (old_lat_max - old_lat_min)) * + (new_lat_max - new_lat_min) + new_lat_min) + + # Calculate ratio of longitude from mid-point of IS2 track + old_lon = longitude.mean() + old_lon_min = -180 + old_lon_max = 180 + new_lon_min = 0 + new_lon_max = 35999 + + new_lon = round(((old_lon - old_lon_min) / (old_lon_max - old_lon_min)) * + (new_lon_max - new_lon_min) + new_lon_min) + + # Access the SST data using the JPL OpenDap interface + url = 'https://opendap.jpl.nasa.gov/opendap/OceanTemperature/ghrsst/data/GDS2/L4/GLOB/JPL/MUR/v4.1/' \ + + str(year) + '/' + str(zero_day_of_year) + '/' + str(date) \ + + '090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.nc' + + dataset = netCDF4.Dataset(url) + + # Access the data and convert the temperature from K to C + water_temp = dataset['analysed_sst'][0, new_lat, new_lon] - 273.15 + return water_temp + + +def refraction_correction(WTemp, WSmodel, Wavelength, + Photon_ref_elev, Ph_ref_azimuth, + PhotonZ, PhotonX, PhotonY, Ph_Conf): + """ + WTemp; there is python library that pulls water temp data + WSmodel is the value surface height + Wavelength is fixed + """ + + # Only process photons below water surface model + PhotonX = PhotonX[PhotonZ <= WSmodel] + PhotonY = PhotonY[PhotonZ <= WSmodel] + Photon_ref_elev = Photon_ref_elev[PhotonZ <= WSmodel] + Ph_ref_azimuth = Ph_ref_azimuth[PhotonZ <= WSmodel] + Ph_Conf = Ph_Conf[PhotonZ <= WSmodel] + PhotonZ = PhotonZ[PhotonZ <= WSmodel] + + # water temp for refraction correction + WaterTemp = WTemp + + # Refraction coefficient # + a = -0.000001501562500 + b = 0.000000107084865 + c = -0.000042759374989 + d = -0.000160475520686 + e = 1.398067112092424 + wl = Wavelength + + # refractive index of air + n1 = 1.00029 + + # refractive index of water + n2 = (a * WaterTemp ** 2) + (b * wl ** 2) + (c * WaterTemp) + (d * wl) + e + + # assumption is 0.25416 + # This example is refractionCoef = 0.25449 + # 1.00029 is refraction of air constant + correction_coef = (1 - (n1 / n2)) + + # read photon ref_elev to get theta1 + theta1 = np.pi / 2 - Photon_ref_elev + + # eq 1. Theta2 + theta2 = np.arcsin(((n1 * np.sin(theta1)) / n2)) + + # eq 3. S + # Approximate water Surface = 1.5 + # D = raw uncorrected depth + D = WSmodel - PhotonZ + + # For Triangle DTS + S = D / np.cos(theta1) + + # eq 2. R + R = (S * n1) / n2 + Gamma = (np.pi / 2) - theta1 + + # For triangle RPS + # phi is an angle needed + phi = theta1 - theta2 + + # P is the difference between raw and corrected YZ location + P = np.sqrt(R ** 2 + S ** 2 - 2 * R * S * np.cos(phi)) + + # alpha is an angle needed + alpha = np.arcsin((R * np.sin(phi)) / P) + + # Beta angle needed for Delta Y an d Delta Z + Beta = Gamma - alpha + + # Delta Y + DY = P * np.cos(Beta) + + # Delta Z + DZ = P * np.sin(Beta) + + # Delta Easting + DE = DY * np.sin(Ph_ref_azimuth) + + # Delta Northing + DN = DY * np.cos(Ph_ref_azimuth) + + outX = PhotonX + DE + outY = PhotonY + DN + outZ = PhotonZ + DZ + + ''' + print('For selected Bathy photon:') + print('lat = ', PhotonY[9000]) + print('long = ', PhotonX[9000]) + print('Raw Depth = ', PhotonZ[9000]) + print('D = ', D[9000]) + + print('ref_elev = ', Photon_ref_elev[9000]) + + print('Delta East = ', DE[9000]) + print('Delta North = ', DN[9000]) + print('Delta Z = ', DZ[9000]) + ''' + return (outX, outY, outZ, Ph_Conf, PhotonX, PhotonY, PhotonZ, Ph_ref_azimuth, + Photon_ref_elev) # We are most interested in out-x, out-y, out-z + + +def get_bath_height_percentile_thresh(binned_data, percentile_thresh, sea_surface_height, vertical_res): + """ Detect bathymetric level per bin based on percentile_thresh """ + # Create sea height list + bath_height = [] + + geo_photon_height = [] + geo_longitude = [] + geo_latitude = [] + + # Group data by latitude + # Filter out surface data that are two bins below median surface value calculated above + binned_data_bath = binned_data[(binned_data['photon_height'] < + sea_surface_height - (vertical_res * 2))] + grouped_data = binned_data_bath.groupby(['lat_bins'], group_keys=True) + data_groups = dict(list(grouped_data)) + + # Create a percentile threshold of photon counts in each grid, + # grouped by both x and y axes. + count_threshold = np.percentile( + binned_data.groupby(['lat_bins', 'height_bins']).size().reset_index().groupby('lat_bins')[[0]].max(), + percentile_thresh) + + # Loop through groups and return average bathy height + for k, v in data_groups.items(): + new_df = pd.DataFrame(v.groupby('height_bins').count()) + bath_bin = new_df['lat'].argmax() + bath_bin_h = new_df.index[bath_bin] + + # Set threshold of photon counts per bin + # here this script determines whether there is bathymetry signals by + # the photon counts per bin below sea surface height + if new_df.iloc[bath_bin]['lat'] >= count_threshold: + + geo_photon_height.append(v.loc[v['height_bins'] == + bath_bin_h, 'cor_photon_height'].values) + geo_longitude.append(v.loc[v['height_bins'] == bath_bin_h, 'lon'].values) + geo_latitude.append(v.loc[v['height_bins'] == bath_bin_h, 'lat'].values) + + bath_bin_median = v.loc[v['height_bins'] == bath_bin_h, 'cor_photon_height'].median() + bath_height.append(bath_bin_median) + del new_df + + else: + bath_height.append(np.nan) + del new_df + + geo_longitude_list = np.concatenate(geo_longitude).ravel().tolist() + geo_latitude_list = np.concatenate(geo_latitude).ravel().tolist() + geo_photon_list = np.concatenate(geo_photon_height).ravel().tolist() + geo_depth = sea_surface_height - geo_photon_list + geo_df = pd.DataFrame( + {'lon': geo_longitude_list, 'lat': geo_latitude_list, + 'photon_height': geo_photon_list, + 'depth': geo_depth}) + + del geo_longitude_list, geo_latitude_list, geo_photon_list + + return bath_height, geo_df + + +def get_bath_height_HDBSCAN(lat_binned_data, percentile_thresh, sea_surface_height, vertical_res): + """ Calculate bathymetric level per lat bin based on horizontal resolution """ + # Create sea height list + bath_height = [] + geo_photon_height = [] + geo_longitude = [] + geo_latitude = [] + + # Group data by latitude + # Filter out surface data that are two bins (2 times height resolution) + # below median sea surface value calculated above + binned_data_bath = lat_binned_data[(lat_binned_data['photon_height'] < + sea_surface_height - (vertical_res * 2))] + + grouped_data = binned_data_bath.groupby(['lat_bins'], group_keys=True) + data_groups = dict(list(grouped_data)) + + # Loop through groups and return average bathymetric height + for k, v in data_groups.items(): + + # assign each group of dataset to a new dataframe + new_df = pd.DataFrame(v) + + # Check if the DataFrame is empty + if new_df.empty: + # If the DataFrame is empty, append null values to the lists and skip to the next iteration + bath_height.append(np.nan) + geo_photon_height.append([]) + geo_longitude.append([]) + geo_latitude.append([]) + del new_df + continue + + lat_height_pairs = list(zip(new_df['lat_utm'], new_df['cor_photon_height'])) + + # Convert to a numpy array for sklearn + lat_height_pairs_array = np.array(lat_height_pairs) + + # Perform HDBSCAN clustering on the photons below sea surface for each lat bin + # scaler = StandardScaler() + scaler = MinMaxScaler() + data_scaled = scaler.fit_transform(lat_height_pairs_array) + + min_cluster_size = 4 + + # Check the number of data points is greater than the initial min_cluster_size + if len(data_scaled) < min_cluster_size: + min_cluster_size = len(data_scaled) // 2 + if min_cluster_size < 2: + bath_height.append(np.nan) + geo_photon_height.append([]) + geo_longitude.append([]) + geo_latitude.append([]) + del new_df + continue + + # cluster = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, leaf_size=20) + cluster = hdbscan.HDBSCAN(min_cluster_size=min_cluster_size, + min_samples=3, cluster_selection_epsilon=20, + leaf_size=25, core_dist_n_jobs=-1) + cluster.fit(data_scaled) + + # Get the labels assigned to each point by the HDBSCAN model + labels = cluster.labels_ + + # Count the number of points assigned to each cluster + unique, counts = np.unique(labels, return_counts=True) + + # Create a dictionary that maps each cluster label to the count of points assigned to it + clusters_dict = dict(zip(unique, counts)) + + # Print the dictionary to see the size of each cluster + print(clusters_dict) + + # Identify the label of the largest cluster + max_cluster_label = max(clusters_dict, key=clusters_dict.get) + + # Check if no cluster is found + if max_cluster_label == -1: + bath_height.append(np.nan) + else: + # Add labels to the DataFrame + new_df_copy = new_df.copy() + new_df_copy['cluster'] = labels + + # Subset the DataFrame to get only the data points in the largest cluster + bath_cluster_data = new_df[new_df_copy['cluster'] == max_cluster_label] + + # calculate the median height value as the average bathymetric height + bath_bin_median = bath_cluster_data['cor_photon_height'].median() + bath_height.append(bath_bin_median) + + # Extract the longitude, latitude, and photon height for each lat bin + # and add it to respective lists + geo_photon_height.append(bath_cluster_data['cor_photon_height'].values) + geo_longitude.append(bath_cluster_data['lon_utm'].values) + geo_latitude.append(bath_cluster_data['lat_utm'].values) + + del new_df + + # Convert the lists to a single list + geo_longitude_list = np.concatenate(geo_longitude).ravel().tolist() + geo_latitude_list = np.concatenate(geo_latitude).ravel().tolist() + geo_photon_list = np.concatenate(geo_photon_height).ravel().tolist() + + # Calculate depth + geo_depth = sea_surface_height - np.array(geo_photon_list) + + # Create a DataFrame + geo_df = pd.DataFrame( + {'lon': geo_longitude_list, 'lat': geo_latitude_list, + 'photon_height': geo_photon_list, + 'depth': geo_depth.tolist()}) + + return bath_height, geo_df + +# requires that the input gdf has ranged index values i +# will need to change if index is changed to time or something +# this currently checks point in polygon for EVERY point +# would be significantly sped up if evaluated at 10m or something similar +# maybe later, fine for now +def isolate_sea_land_photons(shoreline_data_path, ICESat2_GDF): + # try loading the shoreline data + try: + ICESat2_GDF.insert(0, 'lat', ICESat2_GDF.geometry.y, False) + ICESat2_GDF.insert(0, 'lon', ICESat2_GDF.geometry.x, False) + + # allocation of to be used arrays + zero_int_array = np.int64(np.zeros_like(ICESat2_GDF.geometry.x)) + + # Land flag initialized as -1 + # If shorelines downloaded already, will be set to 0 or 1 + ICESat2_GDF.insert(0, 'is_land', + zero_int_array - 1, False) + + # set the projection + ICESat2_GDF.set_crs("EPSG:4326", inplace=True) + + # load shoreline dataset to include only the features that intersect the bounding box + # bbox can be GeoDataFrame or GeoSeries | shapely Geometry, default None + # Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely geometry. + # engine str, 'fiona' or 'pyogrio' + # somtime it gives error if using fiona + # land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF, engine='fiona') + land_polygon_gdf = gpd.read_file(shoreline_data_path, bbox=ICESat2_GDF, engine='pyogrio') + + # continue with getting a new array of 0-or-1 labels for each photon + land_point_labels = np.zeros_like(ICESat2_GDF.is_land.values) + + # update labels for points in the land polygons + pts_in_land = gpd.sjoin(ICESat2_GDF, land_polygon_gdf, predicate='within') + + # get land or not bool value + land_loc = ICESat2_GDF.index.isin(pts_in_land.index) + + # asigned them to new numpy array + land_point_labels[land_loc] = 1 + land_point_labels[~land_loc] = 0 + + return land_point_labels + + except Exception as e: + + print(e) + + print("Error loading shoreline data, returning -1s for is_land flag") + + # if the shoreline data is not available + # return the original label array + + return -np.ones_like(ICESat2_GDF.is_land.values) + + +# +def produce_figures(binned_data, bath_height, sea_height, solo_sea_surface_label, + y_limit_top, y_limit_bottom, percentile, file, geo_df, + ref_y, ref_z, beam, epsg_num): + """Create figures""" + + # Create bins for latitude + bath_x_axis_bins = np.linspace(binned_data.lat.min(), + binned_data.lat.max(), len(bath_height))+20 + + sea_surface_x_axis_bins = np.linspace(binned_data.lat.min(), + binned_data.lat.max(), len(sea_height))+10 + + # Create new dataframes for median values + bath_median_df = pd.DataFrame({'x': bath_x_axis_bins, 'y': bath_height}) + + # Create uniform sea surface based on median sea surface values and filter out surface breaching + sea_height1 = [np.nanmedian(sea_height) if i == i else np.nan for i in sea_height] + sea_median_df = pd.DataFrame({'x': sea_surface_x_axis_bins, 'y': sea_height1}) + + # Create uniform solo sea surface label + sea_surface_label = solo_sea_surface_label + sea_surface_label_df = pd.DataFrame({'x': sea_surface_x_axis_bins, 'y': sea_surface_label}) + idx_1 = np.where(sea_surface_label_df.y == 1) + idx_0 = np.where(sea_surface_label_df.y == 0) + + # Define figure size + fig = plt.rcParams["figure.figsize"] = (40, 25) + + # Plot raw points + # plt.scatter(x=binned_data.lat, + # y = binned_data.photon_height, marker='o', lw=0, s=1, alpha = 0.8, + # c = 'yellow', label = 'Raw photon height') + plt.scatter(ref_y, ref_z, s=0.5, alpha=0.1, c='black') + plt.scatter(geo_df.lat, geo_df.photon_height, s=0.8, marker = 'o', + alpha=0.1, c='red', label='Classified Photons') + + # plt.scatter(x=geo_df.lat, + # y = geo_df.photon_height, marker='o', lw=0, s=0.8, + # alpha = 0.8, c = 'black', label = 'Corrected photon bin') + + # Plot median values + plt.scatter(bath_median_df.x, bath_median_df.y, + marker='o', c='r', alpha=0.8, s=2, label='Median bathymetry') + + plt.scatter(sea_median_df.x, sea_median_df.y, + marker='o', c='b', alpha=1, s=2, label='Median sea surface') + + plt.scatter(sea_surface_label_df.iloc[idx_1].x, sea_surface_label_df.iloc[idx_1].y, + marker='o', c='pink', alpha=1, s=3, label='solo_sea_surface') + plt.scatter(sea_surface_label_df.iloc[idx_0].x, sea_surface_label_df.iloc[idx_0].y, + marker='o', c='g', alpha=1, s=3, label='non_solo_sea_surface') + + # Insert titles and subtitles + plt.title('Icesat2 Bathymetry\n' + file) + plt.xlabel('Latitude', fontsize=25) + plt.ylabel('Photon Height (m)', fontsize=25) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + + plt.legend(loc="upper left", prop={'size': 20}) + + # Limit the x and y axes using parameters + plt.xlim(left=binned_data.lat.min(), right=binned_data.lat.max()) + plt.ylim(top=y_limit_top, bottom=y_limit_bottom) + + timestr = time.strftime("%Y%m%d_%H%M%S") + file = file.replace('.h5', '') + # Define where to save file + plt.tight_layout() + plt.savefig("C:/Workstation/ICESat2_HLS/" + file + '_gt' + + str(beam) + '_' + str(percentile) + + '_EPSG' + str(epsg_num) + '_' + timestr + ".pdf") + # plt.show() + # plt.close() + + # convert corrected locations back to wgs84 (useful to contain) + transformer = Transformer.from_crs("EPSG:" + str(epsg_num), + "EPSG:4326", always_xy=True) + print(transformer) + lon_wgs84, lat_wgs84 = transformer.transform( + geo_df.lon.values, geo_df.lat.values) + + geo_df['lon_wgs84'] = lon_wgs84 + geo_df['lat_wgs84'] = lat_wgs84 + + geodf = gpd.GeoDataFrame(geo_df, + geometry=gpd.points_from_xy(geo_df.lon_wgs84, + geo_df.lat_wgs84)) + + geodf.set_crs(epsg=4326, inplace=True) + + # geodf.to_file("C:/Workstation/ICESat2_HLS/" +file + '_gt' + '_' + str(percentile) + '_EPSG' + + # str(epsg_num) + '_' + timestr + ".gpkg", + # driver="GPKG") diff --git a/icesat-2_kdph_py/kd_utils/sea_photons_analysis.py b/icesat-2_kdph_py/kd_utils/sea_photons_analysis.py new file mode 100644 index 0000000..90a2510 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/sea_photons_analysis.py @@ -0,0 +1,449 @@ + +import numpy as np +import geopandas as gpd +from datetime import datetime +import pandas as pd +import netCDF4 +import pandas as pd +from scipy.signal import find_peaks +from scipy.stats import norm + +# Function to apply binning beam-by-beam by calling the function of horizontal_vertical_bin_dataset +def process_sea_photon_binning(sea_photon_dataset, horizontal_res, vertical_res): + # Initialize list to store results from each beam + binned_beam_datasets = [] + + # Group the dataset by 'beam_id' and process each group separately + for beam_id, beam_data in sea_photon_dataset.groupby('beam_id'): + print(f'Processing binning for beam: {beam_id}') + + # Apply binning to the current beam dataset + binned_beam_data = horizontal_vertical_bin_dataset(beam_data, horizontal_res, vertical_res) + + # Append the binned data for the current beam to the list + binned_beam_datasets.append(binned_beam_data) + + # Combine all binned beam datasets into a single DataFrame + binned_dataset_sea_surface = pd.concat(binned_beam_datasets, ignore_index=True) + + return binned_dataset_sea_surface + + +# Bin data along vertical and horizontal scales +def horizontal_vertical_bin_dataset(dataset, lat_res, vertical_res): + """Bin data along vertical and horizontal scales + for later segmentation""" + + # Filter values within the range (-50, 10), because photons elevation outside this range will be real noise + valid_range = (-70, 5) + valid_mask = (dataset['photon_height'] > valid_range[0]) & (dataset['photon_height'] < valid_range[1]) + + # Apply the valid_mask to filter unwanted values + # and create a copy to avoid SettingWithCopyWarning + filtered_dataset = dataset[valid_mask].copy() + + # Calculate the number of height bins + height_range = abs(filtered_dataset['photon_height'].max() - filtered_dataset['photon_height'].min()) + height_bin_number = max(1, round(height_range / vertical_res)) # Ensure at least one bin + + # Calculate the number of latitude bins + lat_range = abs(filtered_dataset['lat'].max() - filtered_dataset['lat'].min()) + lat_bin_number = max(1, round(lat_range / lat_res)) # Ensure at least one bin + + # Create bins for latitude + lat_bins = pd.cut(filtered_dataset['lat'], bins=lat_bin_number, labels=np.arange(lat_bin_number)) + + # Create bins for height + height_bins = pd.cut(filtered_dataset['photon_height'], bins=height_bin_number, + labels=np.round(np.linspace(filtered_dataset['photon_height'].min(), + filtered_dataset['photon_height'].max(), + num=height_bin_number), decimals=1)) + + # Add bins to dataframe using .loc to avoid SettingWithCopyWarning + filtered_dataset.loc[:, 'lat_bins'] = lat_bins + filtered_dataset.loc[:, 'height_bins'] = height_bins + filtered_dataset = filtered_dataset.reset_index(drop=True) + + return filtered_dataset + + +def get_sea_surface_height_static(binned_data, threshold): + """ + Calculate sea surface height and filter subsurface photons using adaptive detection. + + Parameters: + binned_data (pandas.DataFrame): Binned photon data with lat, height_bins, photon_height + + Returns: + final_sea_surface_height (list): Detected sea surface heights + sea_surface_height_abnormal_label (array): Labels for abnormal heights + sea_surface_dominated_label (array): Labels for surface-dominated bins + PhotonDFBelowSurface (pandas.DataFrame): Photons below detected surface + """ + + #set flag for the df save + firstTimeIndex = True + + # Create sea height list + sea_surface_height = [] + mean_lat_bins_seq = [] + sea_surface_subsurface_photons_ratio = [] + + + # Group dataset along horizental (latitude bins) + grouped_data = binned_data.groupby(['lat_bins'], group_keys=True, observed=False) + data_groups = dict(list(grouped_data)) + + # Loop through groups to detect sea surface + for k, v in data_groups.items(): + # based on lat_utm + lat_bin_average = v['lat'].mean() + + # Create new dataframe based on occurrence of photons per height bin + new_df = pd.DataFrame(v.groupby(['height_bins'], observed=False).count()) + + # Check if new_df is not empty before finding the bin with the highest photon count + if not new_df.empty: + # Find the vertical bin with the highest photon count + largest_h_bin = new_df['lat'].argmax() + + # Select the index of the bin with the highest count + largest_h_index = new_df.index[largest_h_bin] + + # Calculate the median value of all photon height values within this bin + photons_sea_surface = v.loc[v['height_bins'] == largest_h_index, 'photon_height'] + lat_bin_sea_median = photons_sea_surface.median() + + # Append to sea height list + sea_surface_height.append(lat_bin_sea_median) + mean_lat_bins_seq.append(lat_bin_average) + del new_df + + # Get all photons below sea surface + # to determine segment type of each subsurface water column + # Use calculated sea height to determine photons at 0.5m below peak + photons_sea_surface_up = \ + v.loc[(v['photon_height'] > (lat_bin_sea_median - threshold)) & + (v['photon_height'] < (lat_bin_sea_median + 2*threshold))] + + # Calculate the photon ratio between surface and whole photons + if v['photon_height'].shape[0] > 0: + new_photons_ratio_sea_surface = \ + photons_sea_surface_up.shape[0] / v['photon_height'].shape[0] + else: + new_photons_ratio_sea_surface = np.nan + + sea_surface_subsurface_photons_ratio.append((1 - new_photons_ratio_sea_surface)) + + else: + # Append NaNs if the group is empty + sea_surface_height.append(np.nan) + mean_lat_bins_seq.append(np.nan) + sea_surface_subsurface_photons_ratio.append(np.nan) + + # Filter out sea height bin values outside 2 SD of mean. + mean = np.nanmean(sea_surface_height, axis=0) + sd = np.nanstd(sea_surface_height, axis=0) + + final_sea_surface_height = np.where((sea_surface_height > (mean + 2 * sd)) | + (sea_surface_height < (mean - 2 * sd)), + np.nan, + sea_surface_height).tolist() + + sea_surface_height_abnormal_label = np.where(np.isnan(final_sea_surface_height), 0, 1) + + # Determine label based on ratio of sea surface photons and subsurface photons + sea_surface_dominated_label = \ + np.where(np.array(sea_surface_subsurface_photons_ratio) >= 0.2, 0, 1) + + # Loop through groups again and return photons below 0.5m of sea height + PhotonDFBelowThresholdPeak = pd.DataFrame() + for i, (k, v) in enumerate(data_groups.items()): + + # Get all values below this bin + if not np.isnan(final_sea_surface_height[i]): + NewPhotonDFBelowThresholdPeak = \ + v.loc[v['photon_height'] < (final_sea_surface_height[i] - threshold)] + + if firstTimeIndex: + PhotonDFBelowThresholdPeak = NewPhotonDFBelowThresholdPeak + firstTimeIndex=False + else: + PhotonDFBelowThresholdPeak=\ + pd.concat([PhotonDFBelowThresholdPeak, NewPhotonDFBelowThresholdPeak]) + + + return final_sea_surface_height, \ + sea_surface_height_abnormal_label, \ + sea_surface_dominated_label,\ + PhotonDFBelowThresholdPeak + + +def get_sea_surface_height_adaptive(binned_data): + """ + Calculate sea surface height and filter subsurface photons with enhanced wave removal. + + Parameters: + binned_data (pandas.DataFrame): Binned photon data with lat, height_bins, photon_height + + Returns: + final_sea_surface_height (list): Detected sea surface heights + sea_surface_height_abnormal_label (array): Labels for abnormal heights + sea_surface_dominated_label (array): Labels for surface-dominated bins + PhotonDFBelowSurface (pandas.DataFrame): Photons below detected surface with waves removed + """ + + firstTimeIndex = True + sea_surface_height = [] + mean_lat_bins_seq = [] + sea_surface_subsurface_photons_ratio = [] + + grouped_data = binned_data.groupby(['lat_bins'], group_keys=True, observed=False) + data_groups = dict(list(grouped_data)) + + for k, v in data_groups.items(): + lat_bin_average = v['lat'].mean() + + if len(v) < 20: # Increase minimum photon count for robustness + sea_surface_height.append(np.nan) + mean_lat_bins_seq.append(np.nan) + sea_surface_subsurface_photons_ratio.append(np.nan) + continue + + # Finer histogram for better peak resolution + hist, bin_edges = np.histogram(v['photon_height'], + bins=100, # Finer bins + density=True, + range=(v['photon_height'].min(), v['photon_height'].max())) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + + # Enhanced peak detection + peaks, properties = find_peaks(hist, + height=np.max(hist)*0.3, # Stricter peak height + distance=10, # Wider separation + prominence=np.max(hist)*0.1) # Require prominent peaks + + if len(peaks) == 0: + sea_surface_height.append(np.nan) + mean_lat_bins_seq.append(np.nan) + sea_surface_subsurface_photons_ratio.append(np.nan) + continue + + # Use strongest peak as surface, consider nearby peaks as wave effects + surface_peak_idx = peaks[np.argmax(hist[peaks])] + surface_height = bin_centers[surface_peak_idx] + + # Wider window for Gaussian fit to capture wave effects + peak_data = v[(v['photon_height'] > surface_height - 1.0) & + (v['photon_height'] < surface_height + 1.0)] + if len(peak_data) > 10: + mu, sigma = norm.fit(peak_data['photon_height']) + else: + mu, sigma = surface_height, 0.2 # More conservative default + + # Stricter threshold: 3σ below mean, plus minimum depth + # Waves can extend beyond 2.5σ, especially in rough conditions. + # 3σ is a common statistical cutoff for excluding outliers + adaptive_threshold = min(mu - 3.0 * sigma, mu - 1.0) # Ensure at least 1m below + + # Extended surface layer to remove wave effects + surface_photons = v[(v['photon_height'] > adaptive_threshold) & + (v['photon_height'] < mu + 3.0 * sigma)] # Wider upper bound + ratio = len(surface_photons) / len(v) if len(v) > 0 else np.nan + + sea_surface_height.append(mu) + mean_lat_bins_seq.append(lat_bin_average) + sea_surface_subsurface_photons_ratio.append(1 - ratio) + + # Outlier filtering + mean = np.nanmean(sea_surface_height) + sd = np.nanstd(sea_surface_height) + final_sea_surface_height = np.where((sea_surface_height > mean + 2*sd) | + (sea_surface_height < mean - 2*sd), + np.nan, + sea_surface_height).tolist() + + sea_surface_height_abnormal_label = np.where(np.isnan(final_sea_surface_height), 0, 1) + sea_surface_dominated_label = np.where(np.array(sea_surface_subsurface_photons_ratio) >= 0.2, 0, 1) + + # Filter subsurface photons with diagnostics + PhotonDFBelowSurface = pd.DataFrame() + for i, (k, v) in enumerate(data_groups.items()): + if not np.isnan(final_sea_surface_height[i]): + peak_data = v[(v['photon_height'] > final_sea_surface_height[i] - 1.0) & + (v['photon_height'] < final_sea_surface_height[i] + 1.0)] + if len(peak_data) > 10: + mu, sigma = norm.fit(peak_data['photon_height']) + adaptive_threshold = min(mu - 3.0 * sigma, mu - 1.0) + else: + adaptive_threshold = final_sea_surface_height[i] - 1.0 # Stricter default + + NewPhotonDFBelowSurface = v[v['photon_height'] < adaptive_threshold] + + if firstTimeIndex: + PhotonDFBelowSurface = NewPhotonDFBelowSurface + firstTimeIndex = False + else: + PhotonDFBelowSurface = pd.concat([PhotonDFBelowSurface, NewPhotonDFBelowSurface]) + + return (final_sea_surface_height, + sea_surface_height_abnormal_label, + sea_surface_dominated_label, + PhotonDFBelowSurface) + + +def get_water_temp(date_year, date_month, date_day, latitude, longitude): + """ + Pull down surface water temperature along the track from the JPL GHRSST opendap website. + + The GHRSST data are gridded tiles with dimension 17998 x 35999. + To get the specific grid tile of the SST, you must convert from lat, lon coordinates + to the gridded tile ratio of the SST data product using the coordinates of the IS2 data. + """ + # Get date from data filename + # data_path[-33:-25] + date = date_year + date_month + date_day + # date[0:4] + year = date_year + # date[4:6] + month = date_month + # date[6:8] + day = date_day + day_of_year = str(datetime.strptime(date, '%Y%m%d').timetuple().tm_yday) + # Add zero in front of day of year string + zero_day_of_year = day_of_year.zfill(3) + + # Calculate ratio of latitude from mid-point of IS2 track + old_lat = latitude.mean() + old_lat_min = -90 + old_lat_max = 90 + new_lat_min = 0 + new_lat_max = 17998 + + new_lat = round(((old_lat - old_lat_min) / (old_lat_max - old_lat_min)) * + (new_lat_max - new_lat_min) + new_lat_min) + + # Calculate ratio of longitude from mid-point of IS2 track + old_lon = longitude.mean() + old_lon_min = -180 + old_lon_max = 180 + new_lon_min = 0 + new_lon_max = 35999 + + new_lon = round(((old_lon - old_lon_min) / (old_lon_max - old_lon_min)) * + (new_lon_max - new_lon_min) + new_lon_min) + + # Access the SST data using the JPL OpenDap interface + url = 'https://opendap.jpl.nasa.gov/opendap/OceanTemperature/ghrsst/data/GDS2/L4/GLOB/JPL/MUR/v4.1/' \ + + str(year) + '/' + str(zero_day_of_year) + '/' + str(date) \ + + '090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.nc' + + dataset = netCDF4.Dataset(url) + + # Access the data and convert the temperature from K to C + water_temp = dataset['analysed_sst'][0, new_lat, new_lon] - 273.15 + return water_temp + + +def refraction_correction(WTemp, WSmodel, Wavelength, + Photon_ref_elev, Ph_ref_azimuth, + PhotonZ, PhotonX, PhotonY, Ph_Conf): + """ + WTemp; there is python library that pulls water temp data + WSmodel is the value surface height + Wavelength is fixed + """ + + # Only process photons below water surface model + PhotonX = PhotonX[PhotonZ <= WSmodel] + PhotonY = PhotonY[PhotonZ <= WSmodel] + Photon_ref_elev = Photon_ref_elev[PhotonZ <= WSmodel] + Ph_ref_azimuth = Ph_ref_azimuth[PhotonZ <= WSmodel] + Ph_Conf = Ph_Conf[PhotonZ <= WSmodel] + PhotonZ = PhotonZ[PhotonZ <= WSmodel] + + # water temp for refraction correction + WaterTemp = WTemp + + # Refraction coefficient # + a = -0.000001501562500 + b = 0.000000107084865 + c = -0.000042759374989 + d = -0.000160475520686 + e = 1.398067112092424 + wl = Wavelength + + # refractive index of air + n1 = 1.00029 + + # refractive index of water + n2 = (a * WaterTemp ** 2) + (b * wl ** 2) + (c * WaterTemp) + (d * wl) + e + + # assumption is 0.25416 + # This example is refractionCoef = 0.25449 + # 1.00029 is refraction of air constant + correction_coef = (1 - (n1 / n2)) + + # read photon ref_elev to get theta1 + theta1 = np.pi / 2 - Photon_ref_elev + + # eq 1. Theta2 + theta2 = np.arcsin(((n1 * np.sin(theta1)) / n2)) + + # eq 3. S + # Approximate water Surface = 1.5 + # D = raw uncorrected depth + D = WSmodel - PhotonZ + + # For Triangle DTS + S = D / np.cos(theta1) + + # eq 2. R + R = (S * n1) / n2 + Gamma = (np.pi / 2) - theta1 + + # For triangle RPS + # phi is an angle needed + phi = theta1 - theta2 + + # P is the difference between raw and corrected YZ location + P = np.sqrt(R ** 2 + S ** 2 - 2 * R * S * np.cos(phi)) + + # alpha is an angle needed + alpha = np.arcsin((R * np.sin(phi)) / P) + + # Beta angle needed for Delta Y an d Delta Z + Beta = Gamma - alpha + + # Delta Y + DY = P * np.cos(Beta) + + # Delta Z + DZ = P * np.sin(Beta) + + # Delta Easting + DE = DY * np.sin(Ph_ref_azimuth) + + # Delta Northing + DN = DY * np.cos(Ph_ref_azimuth) + + outX = PhotonX + DE + outY = PhotonY + DN + outZ = PhotonZ + DZ + + ''' + print('For selected Bathy photon:') + print('lat = ', PhotonY[9000]) + print('long = ', PhotonX[9000]) + print('Raw Depth = ', PhotonZ[9000]) + print('D = ', D[9000]) + + print('ref_elev = ', Photon_ref_elev[9000]) + + print('Delta East = ', DE[9000]) + print('Delta North = ', DN[9000]) + print('Delta Z = ', DZ[9000]) + ''' + return (outX, outY, outZ, Ph_Conf, PhotonX, PhotonY, PhotonZ, Ph_ref_azimuth, + Photon_ref_elev) # We are most interested in out-x, out-y, out-z + diff --git a/icesat-2_kdph_py/kd_utils/sliderule_adapter.py b/icesat-2_kdph_py/kd_utils/sliderule_adapter.py new file mode 100644 index 0000000..47c6e35 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/sliderule_adapter.py @@ -0,0 +1,181 @@ +import logging +from typing import Dict, List, Optional + +import numpy as np +import pandas as pd +from pyproj import Geod, Proj + +from kd_utils.data_processing import convert_wgs_to_utm + +logger = logging.getLogger(__name__) + + +def _get_beam_id_from_track_pair(df: pd.DataFrame) -> pd.Series: + if 'gt' in df.columns: + return df['gt'].astype(str) + if 'track' in df.columns and 'pair' in df.columns: + suffix = df['pair'].map({0: 'l', 1: 'r'}).fillna('x') + return 'gt' + df['track'].astype(int).astype(str) + suffix + return pd.Series(['unknown'] * len(df), index=df.index, dtype='object') + + +def _infer_strong_beam_suffix(df: pd.DataFrame) -> Optional[str]: + if 'sc_orient' not in df.columns or df.empty: + return None + orient = df['sc_orient'].dropna() + if orient.empty: + return None + # Same logic as your Florida script. + return 'l' if int(orient.iloc[0]) == 0 else 'r' + + +def _normalize_signal_conf(values: pd.Series) -> pd.Series: + def to_scalar(v): + if isinstance(v, (list, tuple, np.ndarray)): + if len(v) == 0: + return np.nan + return float(v[0]) + try: + return float(v) + except Exception: + return np.nan + return values.apply(to_scalar) + + +def _relative_distance_km_per_beam(df: pd.DataFrame) -> pd.Series: + out = pd.Series(np.nan, index=df.index, dtype=float) + for beam_id, beam_df in df.groupby('beam_id'): + beam_df = beam_df.sort_values('delta_time') + if 'segment_dist' in beam_df.columns: + rel_km = (beam_df['segment_dist'] - beam_df['segment_dist'].min()) / 1000.0 + out.loc[beam_df.index] = rel_km.values + continue + + lons = beam_df['longitude'].to_numpy(dtype=float) + lats = beam_df['latitude'].to_numpy(dtype=float) + if len(lons) == 0: + continue + geod = Geod(ellps='WGS84') + cumulative_m = np.zeros(len(lons), dtype=float) + for i in range(1, len(lons)): + _, _, dist_m = geod.inv(lons[i - 1], lats[i - 1], lons[i], lats[i]) + cumulative_m[i] = cumulative_m[i - 1] + max(dist_m, 0.0) + out.loc[beam_df.index] = cumulative_m / 1000.0 + return out + + +def _project_to_utm(df: pd.DataFrame) -> pd.DataFrame: + projected = df.copy() + projected['lon'] = np.nan + projected['lat'] = np.nan + for beam_id, beam_df in projected.groupby('beam_id'): + if beam_df.empty: + continue + lon0 = float(beam_df['longitude'].iloc[0]) + lat0 = float(beam_df['latitude'].iloc[0]) + epsg = convert_wgs_to_utm(lon0, lat0) + proj = Proj(epsg) + x, y = proj(beam_df['longitude'].to_numpy(dtype=float), beam_df['latitude'].to_numpy(dtype=float)) + projected.loc[beam_df.index, 'lon'] = x + projected.loc[beam_df.index, 'lat'] = y + return projected + + +def sliderule_to_framework_dataset( + atl03_gdf, + strong_beams_only: bool = True, +) -> pd.DataFrame: + """ + Convert SlideRule ATL03 photon GeoDataFrame to the framework's sea_photon_dataset schema. + """ + if atl03_gdf is None or len(atl03_gdf) == 0: + return pd.DataFrame(columns=[ + 'beam_id', 'latitude', 'longitude', 'lat', 'lon', 'photon_height', + 'quality_ph', 'photon_conf', 'ref_elevation', 'ref_azimuth', + 'relative_AT_dist', 'solar_elevation', 'background_rate', 'is_land_label' + ]) + + src = pd.DataFrame(atl03_gdf).copy() + src['beam_id'] = _get_beam_id_from_track_pair(src) + if strong_beams_only: + strong_suffix = _infer_strong_beam_suffix(src) + if strong_suffix in ('l', 'r'): + src = src[src['beam_id'].str.endswith(strong_suffix)] + + out = pd.DataFrame({ + 'beam_id': src['beam_id'].astype(str), + 'latitude': pd.to_numeric(src.get('lat_ph', np.nan), errors='coerce'), + 'longitude': pd.to_numeric(src.get('lon_ph', np.nan), errors='coerce'), + 'photon_height': pd.to_numeric(src.get('h_ph', np.nan), errors='coerce'), + 'quality_ph': pd.to_numeric(src.get('quality_ph', np.nan), errors='coerce'), + 'ref_elevation': pd.to_numeric(src.get('ref_elev', np.nan), errors='coerce'), + 'ref_azimuth': pd.to_numeric(src.get('ref_azimuth', np.nan), errors='coerce'), + 'solar_elevation': pd.to_numeric(src.get('solar_elevation', np.nan), errors='coerce'), + 'background_rate': pd.to_numeric(src.get('bckgrd_rate', np.nan), errors='coerce'), + 'delta_time': pd.to_numeric(src.get('delta_time', np.nan), errors='coerce'), + 'segment_dist': pd.to_numeric(src.get('segment_dist', np.nan), errors='coerce'), + }) + + if 'signal_conf_ph' in src.columns: + out['photon_conf'] = _normalize_signal_conf(src['signal_conf_ph']) + else: + out['photon_conf'] = np.nan + + out = out.dropna(subset=['latitude', 'longitude', 'photon_height']).copy() + out = _project_to_utm(out) + out['relative_AT_dist'] = _relative_distance_km_per_beam(out) + out['is_land_label'] = 0 + + return out.drop(columns=['delta_time', 'segment_dist'], errors='ignore') + + +def fetch_sliderule_atl03( + region: List[Dict[str, float]], + t0: str, + t1: str, + rgt: Optional[int] = None, + ph_fields: Optional[List[str]] = None, + sliderule_url: str = "slideruleearth.io", + verbose: bool = False, +): + """ + Fetch ATL03 photons from SlideRule. + """ + try: + from sliderule import sliderule, icesat2 + except Exception as e: + raise ImportError("sliderule package is required for SlideRule ingestion.") from e + + sliderule.init(sliderule_url, verbose=verbose) + fields = ph_fields or [ + "delta_time", "lat_ph", "lon_ph", "h_ph", "quality_ph", "signal_conf_ph", + "segment_dist", "ref_elev", "ref_azimuth", "solar_elevation", "bckgrd_rate", + ] + params = {"poly": region, "t0": t0, "t1": t1, "atl03_ph_fields": fields} + if rgt is not None: + params["rgt"] = int(rgt) + return icesat2.atl03sp(params) + + +def fetch_and_prepare_sliderule_dataset( + region: List[Dict[str, float]], + t0: str, + t1: str, + rgt: Optional[int] = None, + strong_beams_only: bool = True, + sliderule_url: str = "slideruleearth.io", + verbose: bool = False, +) -> pd.DataFrame: + """ + One-call helper: + fetch ATL03 photons from SlideRule and convert to framework dataset schema. + """ + atl03_gdf = fetch_sliderule_atl03( + region=region, + t0=t0, + t1=t1, + rgt=rgt, + sliderule_url=sliderule_url, + verbose=verbose, + ) + return sliderule_to_framework_dataset(atl03_gdf, strong_beams_only=strong_beams_only) diff --git a/icesat-2_kdph_py/kd_utils/trash/Kd_analysis - Copy.py b/icesat-2_kdph_py/kd_utils/trash/Kd_analysis - Copy.py new file mode 100644 index 0000000..89b86cc --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/trash/Kd_analysis - Copy.py @@ -0,0 +1,47 @@ +# utils/Kd_analysis.py + +import numpy as np +import pandas as pd +from scipy.optimize import curve_fit + +def log_model(z, kd, e0): + return np.log(e0) - kd * z + +def CalculateKdFromFilteredSubsurfacePhoton(df, vertical_res=0.8): + if df.empty or 'lat_bins' not in df.columns: + return pd.DataFrame({'lat_bins': [np.nan], 'kd': [np.nan], 'e0': [np.nan]}) + lat_bin_value = df['lat_bins'].iloc[0] if not df['lat_bins'].empty else np.nan + photon_height_min = df['photon_height'].min() + photon_height_max = df['photon_height'].max() + if np.isnan(photon_height_min) or np.isnan(photon_height_max) or photon_height_min == photon_height_max: + return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + height_bins_range = abs(photon_height_max - photon_height_min) + if height_bins_range == 0: + return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + height_bins_number = round(height_bins_range / vertical_res) + if height_bins_number < 5: + return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [np.nan], 'e0': [np.nan]}) + bin_edges = np.linspace(photon_height_min, photon_height_max, num=height_bins_number) + counts, _ = np.histogram(df['photon_height'], bins=bin_edges) + bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 + hist_df = pd.DataFrame({'zdepth': bin_centers, 'photon_counts': counts}) + # x value for model + hist_df['zdepth'] = hist_df['zdepth'].max() - hist_df['zdepth'] + # y value for model + hist_df['log_photon_counts'] = np.log(hist_df['photon_counts'].replace(0, np.nan)) + + hist_df.loc[np.isinf(hist_df['log_photon_counts']), 'log_photon_counts'] = np.nan + + # Skip the regression if there are fewer than 5 datapoints + if hist_df['log_photon_counts'].notna().sum() > 3: + try: + popt, _ = curve_fit(log_model, hist_df['zdepth'].dropna(), hist_df['log_photon_counts'].dropna(), p0=[1, np.exp(1)]) + kd, e0 = popt + if kd < 0: + kd = np.nan + except Exception as e: + print(f"Error in curve fitting: {e}") + kd, e0 = np.nan, np.nan + else: + kd, e0 = np.nan, np.nan + return pd.DataFrame({'lat_bins': [lat_bin_value], 'kd': [kd], 'e0': [e0]}) diff --git a/icesat-2_kdph_py/kd_utils/trash/Kd_analysis.py b/icesat-2_kdph_py/kd_utils/trash/Kd_analysis.py new file mode 100644 index 0000000..86ba447 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/trash/Kd_analysis.py @@ -0,0 +1,98 @@ +import numpy as np +import pandas as pd +import logging +# from scipy.optimize import curve_fit +from sklearn.linear_model import LinearRegression + + +# another solution is to calculate kd without hist +def CalculateKdFromFilteredSubsurfacePhoton(df, vertical_res=0.8): + # Early exit if DataFrame is empty or missing required column + if df.empty or 'lat_bins' not in df.columns: + return pd.DataFrame({'lat_bins': [np.nan], 'kd': [np.nan], 'e0': [np.nan], 'latitude': [np.nan], 'longitude': [np.nan]}) + + # Retrieve latitude and longitude + lat_bin_value = df['lat_bins'].iloc[0] if not df['lat_bins'].empty else np.nan + latitude = df['latitude'].mean() if 'latitude' in df.columns else df['lat'].mean() if 'lat' in df.columns else np.nan + longitude = df['longitude'].mean() if 'longitude' in df.columns else df['lon'].mean() if 'lon' in df.columns else np.nan + + # Use value_counts to get photon counts in each height bin + height_counts = df['height_bins'].value_counts().sort_index() + bin_centers = height_counts.index.astype(float) + + # Create a DataFrame for the height bins and counts + hist_df = pd.DataFrame({'zdepth': bin_centers, 'photon_counts': height_counts.values}) + + # Reverse zdepth for model alignment + hist_df['zdepth'] = hist_df['zdepth'].max() - hist_df['zdepth'] + + # Log-transform photon counts, replacing zeros with NaN for regression + hist_df['log_photon_counts'] = np.log(hist_df['photon_counts'].replace(0, np.nan)) + hist_df.loc[np.isinf(hist_df['log_photon_counts']), 'log_photon_counts'] = np.nan + + # Filter for rows without NaNs for regression + valid_data = hist_df.dropna(subset=['zdepth', 'log_photon_counts']) + + # Perform regression if there are sufficient valid data points + if valid_data['log_photon_counts'].notna().sum() > 3: + zdepth_valid = valid_data['zdepth'].values.reshape(-1, 1) + log_counts_valid = valid_data['log_photon_counts'].values + + # Perform linear regression + model = LinearRegression() + model.fit(zdepth_valid, log_counts_valid) + + # Calculate kd and e0 + kd = -model.coef_[0] + e0 = np.exp(model.intercept_) + + # Set kd to NaN if negative + if kd < 0: + kd = np.nan + else: + kd, e0 = np.nan, np.nan + + return pd.DataFrame({ + 'lat_bins': [lat_bin_value], + 'kd': [kd], + 'e0': [e0], + 'latitude': [latitude], + 'longitude': [longitude] + }) + + +# Original kd calculation function remains unchanged +def calculate_kd(filtered_seafloor_subsurface_photon_dataset): + logging.info("Calculating Kd from filtered subsurface photon dataset") + SubsurfacePhotonDFAddedKd = filtered_seafloor_subsurface_photon_dataset\ + .groupby('lat_bins', observed=False)\ + .apply(CalculateKdFromFilteredSubsurfacePhoton, include_groups=True) + + + # Remove the index without resetting it if 'lat_bins' already exists + SubsurfacePhotonDFAddedKd = SubsurfacePhotonDFAddedKd.droplevel(0) + return SubsurfacePhotonDFAddedKd + + +# Updated function to apply kd calculation beam-by-beam +def process_kd_calculation(Final_filtered_subsurface_photon_dataset): + # Initialize list to store results for each beam + kd_beam_datasets = [] + + # Group by 'beam_id' to process each beam independently + for beam_id, beam_data in Final_filtered_subsurface_photon_dataset.groupby('beam_id'): + logging.info(f"Calculating Kd for beam: {beam_id}") + + # Apply the calculate_kd function to the current beam's dataset + SubsurfacePhotonDFAddedKd = calculate_kd(beam_data) + + # Add a column to track the beam_id in the results + SubsurfacePhotonDFAddedKd['beam_id'] = beam_id + + # Append the result to the list + kd_beam_datasets.append(SubsurfacePhotonDFAddedKd) + + # Combine results from all beams into a single DataFrame + combined_kd_dataset = pd.concat(kd_beam_datasets, ignore_index=True) + + return combined_kd_dataset \ No newline at end of file diff --git a/icesat-2_kdph_py/kd_utils/visualization.py b/icesat-2_kdph_py/kd_utils/visualization.py new file mode 100644 index 0000000..64034d4 --- /dev/null +++ b/icesat-2_kdph_py/kd_utils/visualization.py @@ -0,0 +1,263 @@ +# utils/visualization.py + +import numpy as np +import os +import pandas as pd +import time +import geopandas as gpd +from pyproj import Transformer, Proj +import matplotlib.pyplot as plt +from scipy.spatial import ConvexHull + +def plot_photon_height(sea_photon_dataset, hlims=[-25, 10]): + fig, ax = plt.subplots() + ax.scatter(sea_photon_dataset['latitude'], sea_photon_dataset['photon_height'], s=1, c='k', alpha=0.15, edgecolors='none', label='ATL03 Photons') + ax.set_xlabel('Relative AT Distance') + ax.set_ylabel('Height (h_ph)') + ax.set_title('Scatter Plot of ATL03 Photons') + ax.set_ylim(hlims) + ax.legend() + plt.show() + +def plot_filtered_seafloor_photons(filtered_seafloor_subsurface_dataset, sea_photon_dataset, sea_surface_height, output_path): + """ + Plots the filtered seafloor photon data along with sea surface and seafloor elevation data. + + Parameters: + - filtered_seafloor_subsurface_dataset: DataFrame containing the filtered subsurface photon data. + - sea_photon_dataset: DataFrame containing the original sea photon data. + - sea_surface_height: Array of sea surface height values. + - output_path: Path to save the output plot. + """ + + # get sea_surface_x_axis_bins + sea_surface_x_axis_bins = np.linspace(filtered_seafloor_subsurface_dataset['relative_AT_dist'].min(), + filtered_seafloor_subsurface_dataset['relative_AT_dist'].max(), + len(sea_surface_height)) + + hlims = [-25, 10] + fig, ax = plt.subplots() + + # Scatter plot of subsurface photons + ax.scatter(sea_photon_dataset['relative_AT_dist'], sea_photon_dataset['photon_height'], + s=1, c='k', alpha=0.15, edgecolors='none', label='Subsurface ATL03 Photons') + + # Overlay the seafloor elevation data as points + ax.plot(filtered_seafloor_subsurface_dataset['relative_AT_dist'], filtered_seafloor_subsurface_dataset['seafloor_elevation'], + linewidth=0.8, c='b', alpha=0.4, label='Seafloor Elevation') + + ax.plot(sea_surface_x_axis_bins, [x - 0.5 for x in sea_surface_height], + linewidth=0.8, color='#DD571C', alpha=0.4, label='0.5 m Below Surface Peak') + + # Set labels and title + ax.set_xlabel('Distance From Start of Track (km)') + ax.set_ylabel('Photon Height (m)') + ax.set_title('Scatter Plot of Filtered Sea Photon Dataset') + ax.set_ylim(hlims) + + # Add a legend + ax.legend() + + # Save the plot + plt.legend(loc='upper right') + plt.savefig(output_path, dpi=400, format='jpeg') + + # Show the plot + plt.show() + + +def plot_convex_hulls(photon_dataset, target_beam_ids, convex_hulls, convex_hull_areas): + """ + Plots ConvexHull polygons and annotated areas for each lat_bin group in the dataset. + """ + #filter beam type + # photon_dataset + photon_dataset = photon_dataset[photon_dataset['beam_id'].isin(target_beam_ids)] + + fig, ax = plt.subplots(figsize=(10, 6)) + hlims = [-10, 2] + + for lat_bin, hull_points in convex_hulls.items(): + hull = ConvexHull(hull_points) + ax.fill(hull_points[hull.vertices, 0], hull_points[hull.vertices, 1], alpha=0.3, label=f'Bin {lat_bin}') + + # Calculate centroid to place the label + centroid = np.mean(hull_points[hull.vertices], axis=0) + ax.text(centroid[0], centroid[1], f'{convex_hull_areas[lat_bin]:.2f}', + horizontalalignment='center', verticalalignment='center', fontsize=10, color='black') + + ax.scatter(photon_dataset['lat'], photon_dataset['photon_height'], + s=1, c='k', alpha=0.15, edgecolors='none', label='Subsurface ATL03 Photons') + ax.set_ylim(hlims) + ax.set_xlabel('Latitude') + ax.set_ylabel('Photon Height') + ax.set_title('ConvexHull of Photons within each Lat Bin') + plt.show() + +# plot the kd and photon +def plot_kd_photons(OutputPath, timestamp, target_beam_ids, subsurface_photon_dataset, Kd_DF_MergedDistance): + #filter beam type + # photon_dataset + subsurface_photon_dataset = subsurface_photon_dataset[subsurface_photon_dataset['beam_id'].isin(target_beam_ids)] + Kd_DF_MergedDistance = Kd_DF_MergedDistance[Kd_DF_MergedDistance['beam_id'].isin(target_beam_ids)] + + hlims = [-45, 5] + fig, ax1 = plt.subplots(figsize=(10, 6)) + ax1.scatter(subsurface_photon_dataset['relative_AT_dist'], subsurface_photon_dataset['photon_height'], + s=1.5, c='k', alpha=0.2, edgecolors='none', label='Subsurface ATL03 Photon Height') + ax1.set_xlabel('Relative Along-Track Distance', fontsize=18) + ax1.tick_params(axis='x', labelsize=18) # Added line for x-tick label fontsize + ax1.set_ylabel('Photon Height', color='b', fontsize=18) + ax1.tick_params(axis='y', labelcolor='b', labelsize=18) + ax1.plot(subsurface_photon_dataset['relative_AT_dist'], subsurface_photon_dataset['seafloor_elevation'], + linewidth=0.8, c='b', alpha=0.4, label='Seafloor Elevation') + # ax1.axhline(y=-6, color='blue', linestyle='--', label='y=-6 m') + # ax1.set_xlim([1800, 2100]) # Set x-axis limits + # ax1.set_ylim(hlims) + + ax2 = ax1.twinx() + ax2.scatter(Kd_DF_MergedDistance['relative_AT_dist'], Kd_DF_MergedDistance['kd'], + label='Kd values', color='r', alpha=0.6) + ax2.set_ylabel('Kd Value', color='r', fontsize=18) + ax2.tick_params(axis='y', labelcolor='r',labelsize=18) + # fig.suptitle('Photon Height and Kd Values along Relative Along-Track Distance') + handles1, labels1 = ax1.get_legend_handles_labels() + handles2, labels2 = ax2.get_legend_handles_labels() + fig.legend(handles1 + handles2, labels1 + labels2, loc='upper right', bbox_to_anchor=(0.85, 0.85)) + # fig.legend(handles1 + handles2, labels1 + labels2, loc='lower left', bbox_to_anchor=(0.08, 0.08)) + fig.tight_layout() + plt.savefig(os.path.join(OutputPath, f'{timestamp}_IS2_subsurface_kd_2.jpg'), dpi=400, format='jpeg') + plt.show() + + + +def plot_bin_polygon_data(lat, height, seafloor_height, mask, lat_bins, height_bins, valid_bins, polygons, y_min, y_max): + plt.figure(figsize=(12, 6)) + + plt.subplot(1, 2, 1) + plt.title("Original Height Data") + plt.scatter(lat, height, c=height, cmap='viridis', s=10) + plt.plot(np.sort(lat), seafloor_height[np.argsort(lat)], 'r-', label='Seafloor') + plt.colorbar(label='Height') + plt.xlabel('Latitude') + plt.ylabel('Height') + plt.ylim(y_min, y_max) + plt.legend() + + plt.subplot(1, 2, 2) + plt.title("Masked Region (ROI)") + plt.scatter(lat[mask], height[mask], c=height[mask], cmap='viridis', s=10) + plt.plot(np.sort(lat), seafloor_height[np.argsort(lat)], 'r-', label='Seafloor') + + for polygon in polygons: + plt.gca().add_patch(plt.Polygon(polygon, edgecolor='red', facecolor='none', linewidth=1)) + + for lb in lat_bins: + plt.axvline(x=lb, color='gray', linestyle='--', linewidth=0.5) + for hb in height_bins: + plt.axhline(y=hb, color='gray', linestyle='--', linewidth=0.5) + + plt.colorbar(label='Height') + plt.xlabel('Latitude') + plt.ylabel('Height') + plt.ylim(y_min, y_max) + plt.legend() + + plt.tight_layout() + plt.show() + +# +def produce_figures(binned_data, bath_height, sea_height, solo_sea_surface_label, + y_limit_top, y_limit_bottom, percentile, file, geo_df, + ref_y, ref_z, beam, epsg_num): + """Create figures""" + + # Create bins for latitude + bath_x_axis_bins = np.linspace(binned_data.lat.min(), + binned_data.lat.max(), len(bath_height))+20 + + sea_surface_x_axis_bins = np.linspace(binned_data.lat.min(), + binned_data.lat.max(), len(sea_height))+10 + + # Create new dataframes for median values + bath_median_df = pd.DataFrame({'x': bath_x_axis_bins, 'y': bath_height}) + + # Create uniform sea surface based on median sea surface values and filter out surface breaching + sea_height1 = [np.nanmedian(sea_height) if i == i else np.nan for i in sea_height] + sea_median_df = pd.DataFrame({'x': sea_surface_x_axis_bins, 'y': sea_height1}) + + # Create uniform solo sea surface label + sea_surface_label = solo_sea_surface_label + sea_surface_label_df = pd.DataFrame({'x': sea_surface_x_axis_bins, 'y': sea_surface_label}) + idx_1 = np.where(sea_surface_label_df.y == 1) + idx_0 = np.where(sea_surface_label_df.y == 0) + + # Define figure size + fig = plt.rcParams["figure.figsize"] = (40, 25) + + # Plot raw points + # plt.scatter(x=binned_data.lat, + # y = binned_data.photon_height, marker='o', lw=0, s=1, alpha = 0.8, + # c = 'yellow', label = 'Raw photon height') + plt.scatter(ref_y, ref_z, s=0.5, alpha=0.1, c='black') + plt.scatter(geo_df.lat, geo_df.photon_height, s=0.8, marker = 'o', + alpha=0.1, c='red', label='Classified Photons') + + # plt.scatter(x=geo_df.lat, + # y = geo_df.photon_height, marker='o', lw=0, s=0.8, + # alpha = 0.8, c = 'black', label = 'Corrected photon bin') + + # Plot median values + plt.scatter(bath_median_df.x, bath_median_df.y, + marker='o', c='r', alpha=0.8, s=2, label='Median bathymetry') + + plt.scatter(sea_median_df.x, sea_median_df.y, + marker='o', c='b', alpha=1, s=2, label='Median sea surface') + + plt.scatter(sea_surface_label_df.iloc[idx_1].x, sea_surface_label_df.iloc[idx_1].y, + marker='o', c='pink', alpha=1, s=3, label='solo_sea_surface') + plt.scatter(sea_surface_label_df.iloc[idx_0].x, sea_surface_label_df.iloc[idx_0].y, + marker='o', c='g', alpha=1, s=3, label='non_solo_sea_surface') + + # Insert titles and subtitles + plt.title('Icesat2 Bathymetry\n' + file) + plt.xlabel('Latitude', fontsize=25) + plt.ylabel('Photon Height (m)', fontsize=25) + plt.xticks(fontsize=16) + plt.yticks(fontsize=16) + + plt.legend(loc="upper left", prop={'size': 20}) + + # Limit the x and y axes using parameters + plt.xlim(left=binned_data.lat.min(), right=binned_data.lat.max()) + plt.ylim(top=y_limit_top, bottom=y_limit_bottom) + + timestr = time.strftime("%Y%m%d_%H%M%S") + file = file.replace('.h5', '') + # Define where to save file + plt.tight_layout() + plt.savefig("C:/Workstation/ICESat2_HLS/" + file + '_gt' + + str(beam) + '_' + str(percentile) + + '_EPSG' + str(epsg_num) + '_' + timestr + ".pdf") + # plt.show() + # plt.close() + + # convert corrected locations back to wgs84 (useful to contain) + transformer = Transformer.from_crs("EPSG:" + str(epsg_num), + "EPSG:4326", always_xy=True) + print(transformer) + lon_wgs84, lat_wgs84 = transformer.transform( + geo_df.lon.values, geo_df.lat.values) + + geo_df['lon_wgs84'] = lon_wgs84 + geo_df['lat_wgs84'] = lat_wgs84 + + geodf = gpd.GeoDataFrame(geo_df, + geometry=gpd.points_from_xy(geo_df.lon_wgs84, + geo_df.lat_wgs84)) + + geodf.set_crs(epsg=4326, inplace=True) + + # geodf.to_file("C:/Workstation/ICESat2_HLS/" +file + '_gt' + '_' + str(percentile) + '_EPSG' + + # str(epsg_num) + '_' + timestr + ".gpkg", + # driver="GPKG") \ No newline at end of file diff --git a/icesat-2_kdph_py/main.py b/icesat-2_kdph_py/main.py new file mode 100644 index 0000000..e38729f --- /dev/null +++ b/icesat-2_kdph_py/main.py @@ -0,0 +1,221 @@ +import glob +import logging +import os +import re +import sys + +import pandas as pd + +from config import get_args +from kd_utils.Kd_analysis import process_kd_calculation +from kd_utils.bathy_processing import process_subsurface_photon_filtering +from kd_utils.data_processing import ( + Extract_sea_photons, + apply_optional_ir_ap_filter, + apply_optional_solar_background_filter, + filter_photon_dataset_by_hull_area, + load_data, +) +from kd_utils.sea_photons_analysis import process_sea_photon_binning +from kd_utils.visualization import plot_convex_hulls, plot_kd_photons + + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + + +PAIR_ID_MAP = { + 'gt1l': 'gt1_pair', + 'gt1r': 'gt1_pair', + 'gt2l': 'gt2_pair', + 'gt2r': 'gt2_pair', + 'gt3l': 'gt3_pair', + 'gt3r': 'gt3_pair', +} + + +def get_target_beams(all_beams, beam_attrs, target_beams_arg): + strong_beams = [ + gtx for gtx in all_beams + if beam_attrs[gtx]['atlas_beam_type'].decode('utf-8') == 'strong' + ] + if not target_beams_arg: + return strong_beams + + requested = [b.strip() for b in target_beams_arg.split(',') if b.strip()] + return [b for b in requested if b in strong_beams] + + +def optionally_combine_paired_beams_for_kd(dataset, horizontal_res, enabled=False): + """ + Optionally combine left/right beams into pair groups before Kd fitting. + Uses shared along-track bins from relative_AT_dist when available. + """ + if (not enabled) or dataset.empty: + return dataset + + combined = dataset.copy() + combined['source_beam_id'] = combined['beam_id'] + combined['beam_id'] = combined['beam_id'].map(PAIR_ID_MAP).fillna(combined['beam_id']) + + if 'relative_AT_dist' in combined.columns: + rel_m = pd.to_numeric(combined['relative_AT_dist'], errors='coerce') * 1000.0 + pair_bins = (rel_m / max(horizontal_res, 1)).astype('Int64') + pair_bins = pair_bins.fillna(-1).astype(int) + combined['lat_bins'] = pair_bins + + return combined + + +def run_pipeline(args): + atl03_h5_file_path = os.path.join(args.workspace_path, args.atl03_path, args.atl03_file) + shoreline_data_path = os.path.join(args.workspace_path, args.other_data_path, args.shoreline_data) + gebco_full_path = os.path.join(args.workspace_path, args.other_data_path, args.gebco_path) + atl24_file_path = args.atl24_file + if atl24_file_path and (not os.path.isabs(atl24_file_path)): + atl24_file_path = os.path.join(args.workspace_path, atl24_file_path) + output_path = os.path.join(args.workspace_path, args.output_path) + os.makedirs(output_path, exist_ok=True) + + match = re.search(r"_(\d{14})_", atl03_h5_file_path) + timestamp = match.group(1) if match else "unknown" + + gebco_pattern = os.path.join(gebco_full_path, "gebco_*.tif") + gebco_file_path_lists = [p for p in glob.glob(gebco_pattern)] + + is2_mds, is2_attrs, is2_beams = load_data(atl03_h5_file_path, False) + target_strong_beams = get_target_beams(is2_beams, is2_attrs, args.target_beams) + if not target_strong_beams: + logger.error("No target strong beams found. Check input file and --target_beams.") + sys.exit(1) + logger.info("Strong beams selected: %s", target_strong_beams) + plot_target_beam = [target_strong_beams[0]] + + sea_photon_dataset = Extract_sea_photons(is2_mds, target_strong_beams, shoreline_data_path) + sea_photon_dataset = apply_optional_ir_ap_filter( + sea_photon_dataset, + enabled=args.enable_ir_ap_filter, + quality_max=args.ir_ap_quality_max, + min_signal_conf=args.ir_ap_min_signal_conf, + ) + sea_photon_dataset = apply_optional_solar_background_filter( + sea_photon_dataset, + enabled=args.enable_solar_background_filter, + day_threshold=args.solar_elevation_day_threshold, + background_quantile=args.solar_background_quantile, + min_signal_conf=args.solar_background_min_signal_conf, + ) + + binned_dataset_sea_surface = process_sea_photon_binning( + sea_photon_dataset, horizontal_res=args.horizontal_res, vertical_res=args.vertical_res + ) + + post_refraction_refit_enabled = args.enable_post_refraction_refit + if post_refraction_refit_enabled and (not args.enable_refraction_correction): + logger.warning( + "--enable_post_refraction_refit requested without --enable_refraction_correction. " + "The post-refraction refit step will be skipped." + ) + post_refraction_refit_enabled = False + + sea_surface_height, sea_surface_label, filtered_seafloor_subsurface_photon_dataset = \ + process_subsurface_photon_filtering( + binned_dataset_sea_surface, + gebco_file_path_lists, + args.subsurface_thresh, + args.ignore_subsurface_height_thres, + use_atl24_filter=args.enable_atl24_filter, + atl24_file_path=atl24_file_path, + atl24_max_match_distance_deg=args.atl24_max_match_distance_deg, + use_gebco_filter=args.enable_gebco_filter, + apply_histogram_quality_filter=args.enable_histogram_quality_filter, + histogram_quality_min_ratio=args.histogram_quality_min_ratio, + histogram_quality_depth_min=args.histogram_quality_depth_min, + histogram_quality_depth_max=args.histogram_quality_depth_max, + apply_surface_sigma_filter=args.enable_surface_sigma_filter, + surface_sigma_max=args.surface_sigma_max, + apply_refraction_correction=args.enable_refraction_correction, + refraction_water_temp_c=args.refraction_water_temp_c, + refraction_wavelength_nm=args.refraction_wavelength_nm, + apply_post_refraction_refit=post_refraction_refit_enabled, + apply_flattening=args.enable_sea_surface_flattening, + flattening_window_m=args.sea_surface_flattening_window_m, + horizontal_res=args.horizontal_res, + vertical_res=args.vertical_res, + ) + + final_filtered_subsurface_photon_dataset = filtered_seafloor_subsurface_photon_dataset[ + filtered_seafloor_subsurface_photon_dataset['beam_id'].isin(target_strong_beams) + ].copy() + + if args.enable_convex_hull_filter: + final_filtered_subsurface_photon_dataset, convex_hull_areas, convex_hulls = \ + filter_photon_dataset_by_hull_area( + final_filtered_subsurface_photon_dataset, + hull_area_threshold=args.convex_hull_area_threshold + ) + if plot_target_beam: + plot_convex_hulls( + final_filtered_subsurface_photon_dataset, + plot_target_beam, + convex_hulls, + convex_hull_areas + ) + + subsurface_output_path = os.path.join( + output_path, + f"{timestamp}_strongBeam_{'_'.join(target_strong_beams)}_subsurface_photons.csv", + ) + final_filtered_subsurface_photon_dataset.to_csv(subsurface_output_path, index=False) + + kd_input_dataset = optionally_combine_paired_beams_for_kd( + final_filtered_subsurface_photon_dataset, + horizontal_res=args.horizontal_res, + enabled=args.enable_paired_beam_combine + ) + subsurface_photon_df_added_kd = process_kd_calculation(kd_input_dataset) + kd_output_path = os.path.join(output_path, f"{timestamp}_AddedKdDataset_strongBeams_Further.csv") + subsurface_photon_df_added_kd.to_csv(kd_output_path, index=False) + + if 'relative_AT_dist' in kd_input_dataset.columns: + unique_photon_dataset = kd_input_dataset[ + ['relative_AT_dist', 'lat_bins', 'photon_height'] + ].drop_duplicates() + unique_photon_dataset['relative_AT_dist_center'] = unique_photon_dataset.groupby( + 'lat_bins', observed=False + )['relative_AT_dist'].transform('mean') + + def find_closest(group): + center = group['relative_AT_dist_center'].iloc[0] + group = group.copy() + group['dist_to_center'] = abs(group['relative_AT_dist'] - center) + return group.loc[[group['dist_to_center'].idxmin()]] + + closest_to_center = unique_photon_dataset.groupby('lat_bins', observed=False).apply( + find_closest, include_groups=True + ) + closest_to_center.index = closest_to_center.index.droplevel(0) + kd_df_merged_distance = closest_to_center.merge( + subsurface_photon_df_added_kd, + on='lat_bins', + how='left' + ).drop(columns=['relative_AT_dist_center', 'dist_to_center'], errors='ignore') + + plot_kd_photons( + output_path, + timestamp, + plot_target_beam, + kd_input_dataset, + kd_df_merged_distance, + ) + + logger.info("SUCCESS! Kd output: %s", kd_output_path) + + +if __name__ == '__main__': + cli_args = get_args() + try: + run_pipeline(cli_args) + except Exception as e: + logger.error("An error occurred: %s", e) + raise diff --git a/icesat-2_kdph_py/requirements.txt b/icesat-2_kdph_py/requirements.txt new file mode 100644 index 0000000..e8be0fc --- /dev/null +++ b/icesat-2_kdph_py/requirements.txt @@ -0,0 +1,14 @@ +geopandas==0.12.1 +h5py==3.11.0 +hdbscan==0.8.29 +matplotlib==3.8.2 +netCDF4==1.6.2 +numpy==1.26.4 +pandas==2.2.1 +pyproj==2.6.1.post1 +rasterio==1.3.10 +Rtree==0.9.7 +scikit_learn==1.4.1.post1 +scipy==1.12.1 +Shapely==1.8.2 +utm==0.7.0