Biogeochemical (BGC) analysis at CTD sites
This notebook compares observed and modeled DIC, alkalinity, and nutrients at CTD stations (HV stations) in Hvalfjörður.
Observations: Load processed HAFRO BGC casts and convert to volumetric units for Alk and DIC.
Model fields: Regrid Iceland2 BGC and physical fields to fixed depth levels and extract around HV stations.
Normalized metrics: Compute salinity-normalized Alk/DIC in both data and model to separate freshwater vs internal processes.
Spatial structure: Create along-fjord sections, boxplots by cruise, and depth profiles to visualize gradients.
Nutrient comparison: Analyze NO₃, PO₄, SiO₃, and NH₄ distributions (surface and full water column) across time.
Use this notebook to assess whether the model reproduces the observed carbon and nutrient structure and variability.
Data Processing¶
The following cells load observations and model output, align variables in space and time, and prepare derived fields used in the comparison plots.
Core libraries are imported for file handling, numerical processing, and dataset manipulation used throughout the workflow.
import subprocess
import os
import pandas as pd
#import netCDF4
import numpy as np
import glob
import time
import matplotlib.pyplot as plt
import copy
import xarray as xr
from datetime import datetime, timedelta
import dask
from scipy.interpolate import griddata
#from ocean_c_lab_tools import *
#from celluloid import Camera
#import PyCO2SYS as csys
import gsw as sw
from roms_regrid import *Input paths for model grid and observational BGC sources are defined so all downstream loading uses consistent file references.
model_grid_path="/home/x-uheede/S/Iceland2_MARBL_2024_60m/P_INPUT/Iceland2_grid.nc"
obs_bgc_path='/anvil/projects/x-ees250129/x-uheede/C-Star-in-Hvalfjordur/data/staged/seanoe_hvalfjordur/2026-04-27/seanoe_110401/discrete_samples_qc/discrete_samples_qc.xlsx'
bgc_model_paths = [
"/anvil/projects/x-ees250129/x-uheede/Iceland_experiments/Iceland2_MARBL_2024_60m/Iceland2_MARBL_2024_bgc.2024??????????.nc"
]
phys_model_paths = [
"/anvil/projects/x-ees250129/x-uheede/Iceland_experiments/Iceland2_MARBL_2024_60m/Iceland2_MARBL_2024_his.2024??????????.nc"
]
Observational spreadsheet data is opened and consolidated into analysis-ready tables for station and variable comparisons.
xls = pd.ExcelFile(obs_bgc_path)
combo = pd.read_excel(xls, 'Sheet1', decimal='.', header=27, skiprows=[28])
# We use .str.replace with regex=False for a simple string swap
combo['HV'] = combo['Station_name'].str.replace('HV', '', regex=False).astype(int)
# 2. Create the 'time' column
combo['time'] = pd.to_datetime(combo[['Year_UTC', 'Month_UTC', 'Day_UTC']].rename(
columns={'Year_UTC': 'year', 'Month_UTC': 'month', 'Day_UTC': 'day'}
))
# -----------------------------
# CREATE DEPTH BINS (0–9.9, 10–19.9, ...)
# -----------------------------
combo["depth_bin"] = (np.floor(combo["Depth"] / 10) * 10).astype(int)
obs=xr.Dataset.from_dataframe(combo)
obs=obs.set_index(index=['HV','depth_bin','time'])
obs=obs.drop_duplicates('index')
obs=obs.unstack('index')
obs=obs.rename(name_dict={'Latitude':'lat','Longitude':'lon'})comboObserved hydrographic and biogeochemical fields are cleaned and transformed, including derived seawater properties used in analysis.
density=sw.density.sigma0(obs['Salinity_CTD'],obs['Temperature_CTD'])
obs['Alk']=obs['TA'] * (1 / 1000) * (density+1000)
obs['DIC']=obs['DIC'] * (1 / 1000) * (density+1000)# -----------------------------
# QC MASK (all variables must pass)
# -----------------------------
qc_mask = (
(obs["TEMP_flag"] == 2) &
(obs["Salinity_CTD_flag"] == 2) &
(obs["TA_flag"] == 2) &
(obs["DIC_flag"] == 2)
)
# -----------------------------
# APPLY MASK INTO obs (IMPORTANT)
# -----------------------------
obs["Alk_qc"] = obs["Alk"].where(qc_mask)
obs["DIC_qc"] = obs["DIC"].where(qc_mask)
obs["Salinity_qc"] = obs["Salinity_CTD"].where(qc_mask)
# -----------------------------
# Constants
# -----------------------------
TA0 = 282
Sref = 34
DIC0 = 313
# -----------------------------
# NORMALIZATION (QC-CONSISTENT)
# -----------------------------
obs["nTA"] = ((obs["Alk_qc"] - TA0) / obs["Salinity_qc"]) * Sref + TA0
obs["nDIC"] = ((obs["DIC_qc"] - DIC0) / obs["Salinity_qc"]) * Sref + DIC0Model interfaces are initialized and the model grid is loaded to support extraction at observation locations.
from roms_tools import Grid, ROMSOutputgrid = Grid.from_file(model_grid_path)Model output datasets are opened with the selected forcing/run configuration and prepared for regridding.
roms_output_bgc = ROMSOutput(
grid=grid,
path=bgc_model_paths,
use_dask=True,
)
roms_output_phys = ROMSOutput(
grid=grid,
path=phys_model_paths,
use_dask=True,
)# open grid
grid=xr.open_mfdataset(model_grid_path)
# regridding
h=roms_regrid(grid,grid['h'])
mask=roms_regrid(grid,grid['mask_rho'])Target variables and depth levels are selected, and model fields are regridded to analysis depths.
var_names=['ALK','DIC','PO4','NO3','SiO3','NH4','O2','Fe',"spChl","diatChl"]
depth_levels=np.arange(0, 40,10)bgc = roms_output_bgc.regrid(depth_levels=depth_levels,var_names=var_names).load()
phys = roms_output_phys.regrid(depth_levels=depth_levels)Model and observational time coordinates are normalized so both sources can be compared on consistent timestamps.
time_index = bgc.indexes['time']
unique_time = ~time_index.duplicated()
bgc = bgc.isel(time=unique_time)
bgc = bgc.sortby('time')time_index = phys.indexes['time']
unique_time = ~time_index.duplicated()
phys = phys.isel(time=unique_time)
phys = phys.sortby('time')Reference constants and conversion terms are set for derived chemistry calculations.
Sref=34
nTA_model=((bgc['ALK'].load()-TA0)/phys['salt'].thin({'time': 24}).load())*Sref+TA0
nDIC_model=((bgc['DIC'].load()-DIC0)/phys['salt'].thin({'time': 24}).load())*Sref+DIC0
bgc['nTA']=nTA_model
bgc['nDIC']=nDIC_modelStation coordinates are assembled and averaged where needed to define comparison points.
# define location which calculations the average location of each station
def get_location(obs, hv_values):
locations = []
for hv in hv_values:
lat = obs['lat'].sel(HV=hv).isel(depth_bin=0).mean('time').squeeze().values
lon = obs['lon'].sel(HV=hv).isel(depth_bin=0).mean('time').squeeze().values + 360
locations.append([lat, lon])
return locations
# List of HV values
hv_values = np.unique(obs['HV'].values)
# Get the locations
locations = get_location(obs, hv_values)# Assuming locations is a list of lat/lon pairs
bgc_values = []
# Loop over the first 10 locations and store each selection in t_values
for i in range(7):
lat, lon = locations[i]
# Select the 't' values at the nearest lat/lon
bgc_selected = bgc.sel(lat=lat, method='nearest').sel(lon=lon, method='nearest')
# Store the result in the list
bgc_values.append(bgc_selected)
# Combine the selections into an xarray Dataset or DataArray
bgc_values_combined = xr.concat(bgc_values, dim='location')
# Assign a location coordinate for clarity (optional)
bgc_values_combined = bgc_values_combined.assign_coords(location=('location', [1,3,5,7,9,10,12]))
bgc_values_combined['depth']=bgc_values_combined.depth*(-1)
Tabular helpers are created to streamline matching, interpolation, and alignment between datasets.
import pandas as pd
import numpy as np
# 1. Extract unique time list from obs (the actual cruise dates)
time_list = np.unique(obs['time'].values)
# 2. Identify the target dates we want to fetch from the model
# We look for 2025 data specifically for Feb/March
time_pd = pd.DatetimeIndex(time_list)
selection_targets = [
t.replace(year=2025) if t.month in [2, 3] else t
for t in time_pd
]
# 3. Select nearest model output using the shifted dates
bgc_sub = bgc_values_combined.sel(
time=selection_targets,
method='nearest'
)
# 4. OVERWRITE the model's time coordinate back to the original obs times
# This "tricks" the model data into having the same time labels as your obs
bgc_sub = bgc_sub.assign_coords(time=time_list)The processed subset is loaded into memory as the final input for the plotting section.
bgc_sub.load()Plot: Fjord Domain and Station Context¶
This figure maps model bathymetry and land mask in the fjord and overlays the observational station layout used in later comparisons.
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Define depth levels
levels = np.arange(0, 80)
# Create figure and subplots (3 rows: 1 for the map, 2 for boxplots)
fig = plt.figure(figsize=(12, 13)) # Wider figure for side-by-side plots
gs = fig.add_gridspec(3, 2, height_ratios=[2, 1, 1], width_ratios=[1, 1])
# --- Shared Map (Top Row, Spanning Both Columns) ---
ax_map = fig.add_subplot(gs[0, :], projection=ccrs.PlateCarree())
# Plot bathymetry
cf1 = ax_map.contourf(h.lon, h.lat, h, levels, transform=ccrs.PlateCarree(), cmap='cividis')
ax_map.contourf(h.lon, h.lat, mask.where(mask != 1), cmap='Greys', transform=ccrs.PlateCarree())
# Add station labels
for i, e in enumerate(locations):
ax_map.text(e[1], e[0], hv_values[i], color='red', size=10, ha='center', va='center', transform=ccrs.PlateCarree())
# Gridlines
gls = ax_map.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, color='darkgray', alpha=0.5, linestyle='--')
gls.top_labels = False
gls.right_labels = False
# Colorbar
cb1 = plt.colorbar(cf1, shrink=0.5, ax=ax_map, orientation='horizontal', pad=0.05)
cb1.set_label('Bottom Depth (m)', fontsize=10)
ax_map.set_extent([-22, -21.35, 64.25, 64.45], ccrs.PlateCarree())
ax_map.set_title('Station Overview')
# --- Function for Extracting Data ---
def extract_cruise_data(var, depth=None, num_cruises=22):
"""Extracts non-NaN values for a given variable across cruises."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i) if depth is None else var.isel(time=i, depth_bin=depth))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data(var, depth=None, num_cruises=22):
"""Extracts non-NaN values from the model across time steps."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i) if depth is None else var.isel(time=i, depth=depth))
cruises.append(data[~np.isnan(data)])
return cruises
# --- Boxplots for Observations (Left) and Model (Right) ---
ax_obs = fig.add_subplot(gs[1, 0]) # Observations
ax_model = fig.add_subplot(gs[1, 1]) # Model
ax_obs.boxplot(extract_cruise_data(obs["nTA"]), patch_artist=True)
ax_obs.set_title('Salinity Normalized Alk Variation (Observed)')
ax_obs.set_ylabel('mmol/m$^3$')
ax_obs.set_xticklabels([])
ax_obs.set_ylim(2280,2370)
ax_model.boxplot(extract_model_data(bgc_sub['nTA']), patch_artist=True)
ax_model.set_title('Salinity Normalized Alk Variation (Model)')
ax_model.set_xticklabels([])
ax_model.set_ylim(2230,2320)
# --- Surface-Only Boxplots for Observations (Left) and Model (Right)---
ax_obs_surface = fig.add_subplot(gs[2, 0]) # Observations (Surface)
ax_model_surface = fig.add_subplot(gs[2, 1]) # Model (Surface)
ax_obs_surface.boxplot(extract_cruise_data(obs["nTA"], depth=0), patch_artist=True)
ax_obs_surface.set_title('Salinity Normalized Alk, Surface Only (Observed)')
ax_obs_surface.set_xlabel('Cruise')
ax_obs_surface.set_ylabel('mmol/m$^3$')
ax_obs_surface.set_xticklabels(pd.to_datetime(obs['time'].isel(time=slice(0,22))).strftime('%d/%m/%Y'), rotation=45)
ax_obs_surface.set_ylim(2310,2390)
ax_model_surface.boxplot(extract_model_data(bgc_sub['nTA'], depth=0), patch_artist=True)
ax_model_surface.set_title('Salinity Normalized Alk, Surface Only (Model)')
ax_model_surface.set_xlabel('Cruise')
ax_model_surface.set_ylabel('mmol/m$^3$')
ax_model_surface.set_xticklabels(pd.to_datetime(bgc_sub['time']).strftime('%d/%m/%Y'), rotation=45)
ax_model_surface.set_ylim(2230,2310)
plt.tight_layout()
plt.show()
Plot: Observation Locations Over Model Grid¶
This plot shows the selected observational points relative to the model grid domain to verify spatial alignment before profile analysis.
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Define depth levels
levels = np.arange(0, 80)
# Create figure and subplots (3 rows: 1 for the map, 2 for boxplots)
fig = plt.figure(figsize=(12, 7)) # Wider figure for side-by-side plots
gs = fig.add_gridspec(2, 2, height_ratios=[1, 1], width_ratios=[1, 1])
# --- Function for Extracting Data ---
def extract_cruise_data(var, depth=None, num_cruises=15):
"""Extracts non-NaN values for a given variable across cruises."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i) if depth is None else var.isel(time=i, depth_bin=depth))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data(var, depth=None, num_cruises=15):
"""Extracts non-NaN values from the model across time steps."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i) if depth is None else var.isel(time=i, depth=depth))
cruises.append(data[~np.isnan(data)])
return cruises
# --- Boxplots for Observations (Left) and Model (Right) ---
ax_obs = fig.add_subplot(gs[0, 0]) # Observations
ax_model = fig.add_subplot(gs[0, 1]) # Model
ax_obs.boxplot(extract_cruise_data(obs['nDIC']), patch_artist=True)
ax_obs.set_title('Salinity Normalized DIC Variation (Observed)')
ax_obs.set_ylabel('mmol/m$^3$')
ax_obs.set_xticklabels([])
ax_obs.set_ylim(2070,2180)
ax_model.boxplot(extract_model_data(bgc_sub['nDIC']), patch_artist=True)
ax_model.set_title('Salinity Normalized DIC Variation (Model)')
ax_model.set_xticklabels([])
ax_model.set_ylim(2030,2140)
# --- Surface-Only Boxplots for Observations (Left) and Model (Right) ---
ax_obs_surface = fig.add_subplot(gs[1, 0]) # Observations (Surface)
ax_model_surface = fig.add_subplot(gs[1, 1]) # Model (Surface)
ax_obs_surface.boxplot(extract_cruise_data(obs['nDIC'], depth=0), patch_artist=True)
ax_obs_surface.set_title('Salinity Normalized DIC, Surface Only (Observed)')
ax_obs_surface.set_xlabel('Cruise')
ax_obs_surface.set_ylabel('mmol/m$^3$')
ax_obs_surface.set_xticklabels(pd.to_datetime(obs['time'].isel(time=slice(0,15))).strftime('%d/%m/%Y'), rotation=45)
ax_obs_surface.set_ylim(2070,2180)
ax_model_surface.boxplot(extract_model_data(bgc_sub['nDIC'], depth=0), patch_artist=True)
ax_model_surface.set_title('Salinity Normalized DIC, Surface Only (Model)')
ax_model_surface.set_xlabel('Cruise')
ax_model_surface.set_ylabel('mmol/m$^3$')
ax_model_surface.set_xticklabels(pd.to_datetime(bgc_sub['time'].isel(time=slice(0,15))).strftime('%d/%m/%Y'), rotation=45)
ax_model_surface.set_ylim()
ax_model_surface.set_ylim(2030,2150)
plt.tight_layout()
plt.show()
Plot: Along-Fjord Alkalinity Section¶
This figure compares observed and modeled alkalinity cross-sections along the fjord using depth-bin coordinates.
loc = obs['lon'].sel(HV=[1,5,9,10]).isel(depth_bin=0).mean('time').values
depth=obs['depth_bin'].values
levels=np.arange(2290,2360,2)
bins = np.arange(0, 90, 10)
#depth_bins = xr.DataArray(pd.cut(depth, bins, labels=bins[:-1]), dims="depth", name="binned_depth")
data_alk1=obs['nTA'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).mean('time')
data_alk2=obs['Alk_qc'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).mean('time')
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(6*2, 3), constrained_layout=True)
ax = axarr.flatten()
#c0=ax[0].contourf(loc, data_alk.depth, data_alk.transpose())
c0=ax[0].contourf(loc,data_alk1.depth_bin,data_alk1.transpose(),levels)
plt.colorbar(c0,ax=ax[0], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[0].invert_yaxis()
ax[0].set_title('salinity normalized alkalinity along fjord (observed)')
ax[0].set_xlabel('longitude')
ax[0].set_ylabel('depth (m)')
c0=ax[1].contourf(loc,data_alk2.depth_bin,data_alk2.transpose(),levels)
plt.colorbar(c0,ax=ax[1], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[1].invert_yaxis()
ax[1].set_title('alkalinity along fjord (observed)')
ax[1].set_xlabel('longitude')
ax[1].set_ylabel('depth (m)')Plot: Along-Fjord Alkalinity Section (Depth Sign Convention)¶
This version of the alkalinity section uses negative depth orientation to present the vertical structure in oceanographic convention.
loc = obs['lon'].sel(HV=[1,5,9,10]).isel(depth_bin=0).mean('time').values
depth=obs['depth_bin'].values
levels=np.arange(2230,2300,2)
bins = np.arange(0, 90, 10)
#depth_bins = xr.DataArray(pd.cut(depth, bins, labels=bins[:-1]), dims="depth", name="binned_depth")
data_alk1=bgc_sub['nTA'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean('time')
data_alk2=bgc_sub['ALK'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean('time')
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(6*2, 3), constrained_layout=True)
ax = axarr.flatten()
#c0=ax[0].contourf(loc, data_alk.depth, data_alk.transpose())
c0=ax[0].contourf(loc,data_alk1.depth*(-1),data_alk1.transpose(),levels)
plt.colorbar(c0,ax=ax[0], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[0].set_ylim(0,40)
ax[0].invert_yaxis()
ax[0].set_title('salinity normalized alkalinity along fjord (modeled)')
ax[0].set_xlabel('longitude')
ax[0].set_ylabel('depth (m)')
c0=ax[1].contourf(loc,data_alk2.depth*(-1),data_alk2.transpose(),levels)
plt.colorbar(c0,ax=ax[1], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[1].set_ylim(0,40)
ax[1].invert_yaxis()
ax[1].set_title('alkalinity along fjord (modeled)')
ax[1].set_xlabel('longitude')
ax[1].set_ylabel('depth (m)')Plot: Time-Series Comparison for Alkalinity Metrics¶
This panel compares observed and modeled alkalinity time-series statistics across selected depth ranges.
import matplotlib.pyplot as plt
import numpy as np
# Compute mean and standard deviation over time for observations
depth_obs = obs['depth_bin'].values
depth_model = bgc_sub['depth'].values
# Observations (Mean and Std Dev)
alk_mean_obs = obs['DIC_qc'].sel(HV=[1,5,9,10]).isel(time=slice(0,15)).mean(dim=['time', 'HV'])
alk_std_obs = obs['DIC_qc'].sel(HV=[1,5,9,10]).isel(time=slice(0,15)).std(dim=['time', 'HV'])
nalk_mean_obs = obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).mean(dim=['time', 'HV'])
nalk_std_obs = obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).std(dim=['time', 'HV'])
# Model (Mean and Std Dev)
alk_mean_model = bgc_sub['DIC'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean(dim=['time', 'location'])
alk_std_model = bgc_sub['DIC'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).std(dim=['time', 'location'])
nalk_mean_model = bgc_sub['nDIC'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean(dim=['time', 'location'])
nalk_std_model = bgc_sub['nDIC'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).std(dim=['time', 'location'])
# -----------------------------
# Define month mask (April–September)
# -----------------------------
obs_month_mask = obs['time'].dt.month.isin([5, 6, 7, 8])
mod_month_mask = bgc_sub['time'].dt.month.isin([5, 6, 7, 8])
# -----------------------------
# Observations (Mean and Std Dev)
# -----------------------------
alk_mean_obs = obs['DIC_qc'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).mean(dim=['time', 'HV'])
alk_std_obs = obs['DIC_qc'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).std(dim=['time', 'HV'])
nalk_mean_obs = obs['nDIC'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).mean(dim=['time', 'HV'])
nalk_std_obs = obs['nDIC'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).std(dim=['time', 'HV'])
# -----------------------------
# Model (Mean and Std Dev)
# -----------------------------
alk_mean_model = bgc_sub['DIC'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).mean(dim=['time', 'location'])
alk_std_model = bgc_sub['DIC'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).std(dim=['time', 'location'])
nalk_mean_model = bgc_sub['nDIC'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).mean(dim=['time', 'location'])
nalk_std_model = bgc_sub['nDIC'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).std(dim=['time', 'location'])
# Create figure with 2 columns (nTA on left, TA on right)
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(12, 6), constrained_layout=True)
ax = axarr.flatten()
# --- Plot Salinity Normalized DIC (nTA) ---
ax[0].plot(
nalk_mean_obs, depth_obs,
marker='o', linestyle='-',
color='orange', label='Observed DIC'
)
ax[0].fill_betweenx(
depth_obs,
nalk_mean_obs - nalk_std_obs,
nalk_mean_obs + nalk_std_obs,
color='orange', alpha=0.3
)
ax[0].plot(
nalk_mean_model, depth_model * (-1),
marker='s', linestyle='--',
color='black', label='Modeled DIC'
)
ax[0].fill_betweenx(
depth_model * (-1),
nalk_mean_model - nalk_std_model,
nalk_mean_model + nalk_std_model,
color='grey', alpha=0.3
)
ax[0].set_ylim(0, 30)
ax[0].invert_yaxis()
ax[0].set_xlabel('Salinity Normalized DIC (mmol/m³)')
ax[0].set_ylabel('Depth (m)')
ax[0].set_title('Salinity Normalized DIC Profile')
ax[0].legend()
ax[0].grid()
# --- Plot Total DIC ---
ax[1].plot(
alk_mean_obs, depth_obs,
marker='o', linestyle='-',
color='orange', label='Observed DIC'
)
ax[1].fill_betweenx(
depth_obs,
alk_mean_obs - alk_std_obs,
alk_mean_obs + alk_std_obs,
color='orange', alpha=0.3
)
ax[1].plot(
alk_mean_model, depth_model * (-1),
marker='s', linestyle='--',
color='black', label='Modeled DIC'
)
ax[1].fill_betweenx(
depth_model * (-1),
alk_mean_model - alk_std_model,
alk_mean_model + alk_std_model,
color='grey', alpha=0.3
)
ax[1].set_ylim(0, 30)
ax[1].invert_yaxis()
ax[1].set_xlabel('Total DIC (mmol/m³)')
ax[1].set_title('Total DIC Profile')
ax[1].legend()
ax[1].grid()
# Label c)
ax[0].text(-0.05, 1.05, 'c)', transform=ax[0].transAxes,
fontsize=16, fontweight='bold', va='top', ha='right')
# Label d)
ax[1].text(-0.05, 1.05, 'd)', transform=ax[1].transAxes,
fontsize=16, fontweight='bold', va='top', ha='right')
plt.show()
Plot: Time-Series Comparison for DIC Metrics¶
This panel compares observed and modeled dissolved inorganic carbon time-series behavior across the same stations/depth ranges.
import matplotlib.pyplot as plt
import numpy as np
# Compute mean and standard deviation over time for observations
depth_obs = obs['depth_bin'].values
depth_model = bgc_sub['depth'].values
# Observations (Mean and Std Dev)
alk_mean_obs = obs['Alk_qc'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).mean(dim=['time', 'HV'])
alk_std_obs = obs['Alk_qc'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).std(dim=['time', 'HV'])
nalk_mean_obs = obs['nTA'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).mean(dim=['time', 'HV'])
nalk_std_obs = obs['nTA'].sel(HV=[1,5,9,10]).isel(time=slice(0,10)).std(dim=['time', 'HV'])
# Model (Mean and Std Dev)
alk_mean_model = bgc_sub['ALK'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean(dim=['time', 'location'])
alk_std_model = bgc_sub['ALK'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).std(dim=['time', 'location'])
nalk_mean_model = bgc_sub['nTA'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).mean(dim=['time', 'location'])
nalk_std_model = bgc_sub['nTA'].sel(location=[1,5,9,10]).isel(time=slice(2,7)).std(dim=['time', 'location'])
obs_month_mask = (
(obs['time'].dt.year == 2024) &
(obs['time'].dt.month.isin([4, 5,6,7,8,9]))=
# -----------------------------
# Observations (Mean and Std Dev)
# -----------------------------
alk_mean_obs = obs['Alk_qc'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).mean(dim=['time', 'HV'])
alk_std_obs = obs['Alk_qc'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).std(dim=['time', 'HV'])
nalk_mean_obs = obs['nTA'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).mean(dim=['time', 'HV'])
nalk_std_obs = obs['nTA'].sel(HV=[1,5,9,10]).where(obs_month_mask, drop=True).std(dim=['time', 'HV'])
# -----------------------------
# Model (Mean and Std Dev)
# -----------------------------
alk_mean_model = bgc_sub['ALK'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).mean(dim=['time', 'location'])
alk_std_model = bgc_sub['ALK'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).std(dim=['time', 'location'])
nalk_mean_model = bgc_sub['nTA'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).mean(dim=['time', 'location'])
nalk_std_model = bgc_sub['nTA'].sel(location=[1,5,9,10]).where(mod_month_mask, drop=True).std(dim=['time', 'location'])
# Create figure with 2 columns (nTA on left, TA on right)
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(12, 6), constrained_layout=True)
ax = axarr.flatten()
# --- Plot Salinity Normalized DIC (nTA) ---
ax[0].plot(
nalk_mean_obs, depth_obs,
marker='o', linestyle='-',
color='orange', label='Observed nTA'
)
ax[0].fill_betweenx(
depth_obs,
nalk_mean_obs - nalk_std_obs,
nalk_mean_obs + nalk_std_obs,
color='orange', alpha=0.3
)
ax[0].plot(
nalk_mean_model, depth_model * (-1),
marker='s', linestyle='--',
color='black', label='Modeled nTA'
)
ax[0].fill_betweenx(
depth_model * (-1),
nalk_mean_model - nalk_std_model,
nalk_mean_model + nalk_std_model,
color='grey', alpha=0.3
)
ax[0].set_ylim(0, 30)
ax[0].invert_yaxis()
ax[0].set_xlabel('Salinity Normalized Alk (mmol/m³)')
ax[0].set_ylabel('Depth (m)')
ax[0].set_title('Salinity Normalized Alk Profile')
ax[0].legend()
ax[0].grid()
# --- Plot Total DIC ---
ax[1].plot(
alk_mean_obs, depth_obs,
marker='o', linestyle='-',
color='orange', label='Observed Alk'
)
ax[1].fill_betweenx(
depth_obs,
alk_mean_obs - alk_std_obs,
alk_mean_obs + alk_std_obs,
color='orange', alpha=0.3
)
ax[1].plot(
alk_mean_model, depth_model * (-1),
marker='s', linestyle='--',
color='black', label='Modeled Alk'
)
ax[1].fill_betweenx(
depth_model * (-1),
alk_mean_model - alk_std_model,
alk_mean_model + alk_std_model,
color='grey', alpha=0.3
)
ax[1].set_ylim(0, 30)
ax[1].invert_yaxis()
ax[1].set_xlabel('Total ALK (mmol/m³)')
ax[1].set_title('Total ALK Profile')
ax[1].legend()
ax[1].grid()
# Label a)
ax[0].text(-0.05, 1.05, 'a)', transform=ax[0].transAxes,
fontsize=16, fontweight='bold', va='top', ha='right')
# Label b)
ax[1].text(-0.05, 1.05, 'b)', transform=ax[1].transAxes,
fontsize=16, fontweight='bold', va='top', ha='right')
plt.show()
Plot: Side-by-Side Vertical Fields¶
This figure displays paired vertical contour fields for observations and model output to highlight structural differences.
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
# --------------------------------------------------
# Open dataset
# --------------------------------------------------
ds = xr.open_dataset(
"/anvil/projects/x-ees250129/Datasets/UNIFIED/BGCdataset.nc"
)
# --------------------------------------------------
# Select surface layer
# --------------------------------------------------
if "dep" in ds.dims:
Alk_surf = ds["Alk"].isel(dep=0)
DIC_surf = ds["DIC"].isel(dep=0)
elif "z" in ds.dims:
Alk_surf = ds["Alk"].isel(z=0)
DIC_surf = ds["DIC"].isel(z=0)
else:
raise ValueError("No recognized vertical dimension found.")
# --------------------------------------------------
# Coordinates
# --------------------------------------------------
lat = ds["lat"]
lon = ds["lon"]
# --------------------------------------------------
# Plot
# --------------------------------------------------
fig, axs = plt.subplots(
ncols=2,
figsize=(12, 5),
subplot_kw={"projection": ccrs.PlateCarree()},
dpi=300
)
# ---- ALK ----
cf1 = axs[0].contourf(
lon, lat, Alk_surf[0, :, :],
levels=np.arange(2280,2360,5),
cmap="viridis",
transform=ccrs.PlateCarree()
)
axs[0].set_title("Surface Alkalinity")
plt.colorbar(cf1, ax=axs[0], orientation="vertical", label="ALK")
# ---- DIC ----
cf2 = axs[1].contourf(
lon, lat, DIC_surf[6, :, :],
levels=np.arange(1900,2100,10),
cmap="plasma",
transform=ccrs.PlateCarree()
)
axs[1].set_title("Surface DIC")
plt.colorbar(cf2, ax=axs[1], orientation="vertical", label="DIC")
# --------------------------------------------------
# Zoom to Iceland
# --------------------------------------------------
# (SW Iceland–centered, but includes whole island)
lon_min, lon_max = -32, -10
lat_min, lat_max = 60, 68
for ax in axs:
ax.set_extent([lon_min, lon_max, lat_min, lat_max],
crs=ccrs.PlateCarree())
ax.coastlines(resolution="10m")
ax.gridlines(draw_labels=True, linewidth=0.5, alpha=0.5)
plt.tight_layout()
plt.show()
Plot: Along-Fjord Section for Additional Chemistry Field¶
This section plot compares an additional biogeochemical variable between observations and model along the fjord transect.
loc = obs['lon'].sel(HV=[1,5,9,10]).isel(depth_bin=0).mean('time').values
depth=obs['depth_bin'].values
levels=np.arange(2070,2160,2)
bins = np.arange(0, 90, 10)
#depth_bins = xr.DataArray(pd.cut(depth, bins, labels=bins[:-1]), dims="depth", name="binned_depth")
data_alk1=obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(0,4)).mean('time')
data_alk2=obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(5,10)).mean('time')
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(6*2, 3), constrained_layout=True)
ax = axarr.flatten()
#c0=ax[0].contourf(loc, data_alk.depth, data_alk.transpose())
c0=ax[0].contourf(loc,data_alk1.depth_bin,data_alk1.transpose(),levels)
plt.colorbar(c0,ax=ax[0], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[0].invert_yaxis()
ax[0].set_title('salinity normalized DIC along fjord April-May (observed)')
ax[0].set_xlabel('longitude')
ax[0].set_ylabel('depth (m)')
c0=ax[1].contourf(loc,data_alk2.depth_bin,data_alk2.transpose(),levels)
plt.colorbar(c0,ax=ax[1], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[1].invert_yaxis()
ax[1].set_title('salinity normalized DIC along fjord June-August (observed)')
ax[1].set_xlabel('longitude')
ax[1].set_ylabel('depth (m)')Plot: Along-Fjord Section (Negative Depth View)¶
This repeat section shows the same comparison with negative-depth plotting for consistency with other profile figures.
loc = obs['lon'].sel(HV=[1,5,9,10]).isel(depth_bin=0).mean('time').values
depth=obs['depth_bin'].values
levels=np.arange(2070,2160,2)
bins = np.arange(0, 90, 10)
#depth_bins = xr.DataArray(pd.cut(depth, bins, labels=bins[:-1]), dims="depth", name="binned_depth")
data_alk1=obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(0,4)).mean('time')
data_alk2=obs['nDIC'].sel(HV=[1,5,9,10]).isel(time=slice(5,10)).mean('time')
data_alk1=bgc_sub['nDIC'].sel(location=[1,5,9,10]).isel(time=slice(0,4)).mean('time')
data_alk2=bgc_sub['nDIC'].sel(location=[1,5,9,10]).isel(time=slice(5,10)).mean('time')
fig, axarr = plt.subplots(nrows=1, ncols=2, figsize=(6*2, 3), constrained_layout=True)
ax = axarr.flatten()
#c0=ax[0].contourf(loc, data_alk.depth, data_alk.transpose())
c0=ax[0].contourf(loc,data_alk1.depth*(-1),data_alk1.transpose(),levels)
plt.colorbar(c0,ax=ax[0], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[0].set_ylim(0,40)
ax[0].invert_yaxis()
ax[0].set_title('salinity normalized DIC along fjord April-May (modeled)')
ax[0].set_xlabel('longitude')
ax[0].set_ylabel('depth (m)')
c0=ax[1].contourf(loc,data_alk2.depth*(-1),data_alk2.transpose(),levels)
plt.colorbar(c0,ax=ax[1], orientation='vertical', label='mmol/m3',shrink=0.5)
ax[1].set_ylim(0,40)
ax[1].invert_yaxis()
ax[1].set_title('salinity normalized DIC along fjord June-August (modeled )')
ax[1].set_xlabel('longitude')
ax[1].set_ylabel('depth (m)')Plot: Surface vs Deep Time-Series by Station¶
These subplots compare upper-layer and deeper-layer observed versus modeled time series for each station.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create figure and subplots (2x2)
fig, axes = plt.subplots(
nrows=2, ncols=2, figsize=(14, 8), sharex=False
)
# --- Data Extraction Functions ---
def extract_depth_avg(var, depth_range):
"""Extract mean across depth range for each time step (observations)."""
return var.sel(depth_bin=slice(*depth_range)).mean(dim='depth_bin', skipna=True)
def extract_depth_avg_model(var, depth_range):
"""Extract mean across depth range for each time step (model)."""
return var.sel(depth=slice(*depth_range)).mean(dim='depth', skipna=True)
# --- Depth ranges ---
upper_depth_range = (0, 5)
lower_depth_range = (20, 40)
upper_depth_range_m = (-0, -10)
lower_depth_range_m = (-20, -40)
# =======================
# TOP ROW — nDIC
# =======================
# --- Observed nDIC ---
ax = axes[0, 0]
obs_upper = extract_depth_avg(obs['nDIC'].mean('HV'), upper_depth_range)
obs_lower = extract_depth_avg(obs['nDIC'].mean('HV'), lower_depth_range)
ax.plot(obs['time'], obs_upper, label='0–10 m', marker='o')
ax.plot(obs['time'], obs_lower, label='20–40 m', marker='s', linestyle='--')
ax.set_title('Salinity-Normalized DIC (Observed)')
ax.set_ylabel('mmol m$^{-3}$')
ax.legend()
ax.tick_params(axis='x', rotation=45)
# --- Modeled nDIC ---
ax = axes[0, 1]
model_upper = extract_depth_avg_model(
bgc_sub['nDIC'].mean('location'), upper_depth_range_m
)
model_lower = extract_depth_avg_model(
bgc_sub['nDIC'].mean('location'), lower_depth_range_m
)
ax.plot(bgc_sub['time'], model_upper, label='0–10 m', marker='o')
ax.plot(bgc_sub['time'], model_lower, label='20–40 m', marker='s', linestyle='--')
ax.set_title('Salinity-Normalized DIC (Model)')
ax.legend()
ax.tick_params(axis='x', rotation=45)
# =======================
# BOTTOM ROW — ALK
# =======================
# --- Observed ALK ---
ax = axes[1, 0]
obs_upper = extract_depth_avg(obs['nTA'].mean('HV'), upper_depth_range)
obs_lower = extract_depth_avg(obs['nTA'].mean('HV'), lower_depth_range)
ax.plot(obs['time'], obs_upper, label='0–10 m', marker='o')
ax.plot(obs['time'], obs_lower, label='20–40 m', marker='s', linestyle='--')
ax.set_title('Salinity-normalized Alkalinity (Observed)')
ax.set_ylabel('mmol m$^{-3}$')
ax.legend()
ax.tick_params(axis='x', rotation=45)
# --- Modeled ALK ---
ax = axes[1, 1]
model_upper = extract_depth_avg_model(
bgc_sub['nTA'].mean('location'), upper_depth_range_m
)
model_lower = extract_depth_avg_model(
bgc_sub['nTA'].mean('location'), lower_depth_range_m
)
ax.plot(bgc_sub['time'], model_upper, label='0–10 m', marker='o')
ax.plot(bgc_sub['time'], model_lower, label='20–40 m', marker='s', linestyle='--')
ax.set_title('Salinity-normalized Alkalinity (Model)')
ax.legend()
ax.tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()Plot: Multi-Panel Station Diagnostics¶
This composite figure summarizes station-by-station diagnostic time series for core biogeochemical variables.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# --- Configuration ---
# Match the 'variables' list EXACTLY to the names in your Data variables list
variables = ['Nitrate_and_Nitrite', 'Phosphate', 'Silicate', 'Ammonium']
# --- Refactored Extraction Functions ---
def extract_cruise_data(ds, var_name, depth_idx=0, num_cruises=22):
"""Filters by flag == 2 and extracts values using exact depth_bin index."""
cruises = []
# Dynamically find the flag column (e.g., 'Phosphate_flag')
flag_col = f"{var_name}_flag"
# Check if the flag column exists to prevent a new KeyError
if flag_col in ds.data_vars:
filtered_data = ds[var_name].where(ds[flag_col] == 2)
else:
# Fallback if no flag is found
filtered_data = ds[var_name]
for i in range(num_cruises):
# Select by the position in the depth dimension
data = np.hstack(filtered_data.isel(time=i, depth_bin=depth_idx))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data(var, depth_idx=0, num_cruises=22):
"""Extracts model data using exact depth index."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i, depth=depth_idx))
cruises.append(data[~np.isnan(data)])
return cruises
# --- Plotting ---
fig = plt.figure(figsize=(12, 14))
gs = fig.add_gridspec(4, 2, height_ratios=[1, 1, 1, 1], width_ratios=[1, 1])
for row, var in enumerate(variables):
# --- Observations (Left) ---
ax_obs = fig.add_subplot(gs[row, 0])
obs_data = extract_cruise_data(obs, var, depth_idx=0)
ax_obs.boxplot(obs_data, patch_artist=True, boxprops=dict(facecolor='lightblue'))
# Replace underscores with spaces for cleaner titles
display_name = var.replace('_', ' ')
ax_obs.set_title(f'{display_name} Surface (Observed)')
ax_obs.set_ylabel('mmol/m$^3$')
# --- Model (Right) ---
ax_model = fig.add_subplot(gs[row, 1])
# Match model names to observation names
# Mapping Silicate (Obs) -> SiO3 (Model) and Nitrate_and_Nitrite (Obs) -> NO3 (Model)
model_map = {
'Silicate': 'SiO3',
'Nitrate_and_Nitrite': 'NO3',
'Phosphate': 'PO4',
'Ammonium': 'NH4'
}
model_key = model_map.get(var, var)
mod_data = extract_model_data(bgc_sub[model_key], depth_idx=0)
ax_model.boxplot(mod_data, patch_artist=True, boxprops=dict(facecolor='salmon'))
ax_model.set_title(f'{display_name} Surface (Model)')
# --- X-Axis Labels ---
if row == 3:
date_labels = pd.to_datetime(obs['time'].isel(time=slice(0, 22))).strftime('%d/%m/%Y')
ax_obs.set_xticklabels(date_labels, rotation=45)
ax_model.set_xticklabels(date_labels, rotation=45)
else:
ax_obs.set_xticklabels([])
ax_model.set_xticklabels([])
plt.tight_layout()
plt.show()print(obs.data_vars)Plot: Extended Multi-Panel Diagnostics¶
This larger diagnostic panel expands the station comparison across additional variables and/or depth groupings.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create figure and subplots
fig = plt.figure(figsize=(12, 16))
gs = fig.add_gridspec(4, 2, height_ratios=[1, 1, 1, 1], width_ratios=[1, 1])
# --- Refactored Data Extraction Functions ---
def extract_cruise_data(ds, var_name, depth_bin=0, num_cruises=22):
"""Filters by flag == 2 and extracts values using exact depth_bin index."""
cruises = []
# Identify the flag variable (e.g., 'Nitrate_and_Nitrite_flag')
flag_col = f"{var_name}_flag"
# Filter for Flag == 2 (Good Data)
filtered_var = ds[var_name].where(ds[flag_col] == 2)
for i in range(num_cruises):
# Using .isel(time=i) because your metadata showed 'time' is the dimension name
data = np.hstack(filtered_var.isel(time=i, depth_bin=depth_bin))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data(var, depth=0, num_cruises=22):
"""Extracts non-NaN values from the model across time steps."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i, depth=depth))
cruises.append(data[~np.isnan(data)])
return cruises
# --- Nitrate and Nitrite (NO3/NO2) Boxplots ---
ax1_obs = fig.add_subplot(gs[0, 0])
ax1_model = fig.add_subplot(gs[0, 1])
ax1_obs.boxplot(extract_cruise_data(obs, 'Nitrate_and_Nitrite'), patch_artist=True)
ax1_obs.set_title('Nitrate and Nitrite Surface Only (Observed)')
ax1_obs.set_ylabel('mmol/m$^3$')
ax1_obs.set_xticklabels([])
ax1_model.boxplot(extract_model_data(bgc_sub['NO3']), patch_artist=True)
ax1_model.set_title('Nitrate and Nitrite Surface Only (Model)')
ax1_model.set_xticklabels([])
# --- Phosphate (PO4) Boxplots ---
ax2_obs = fig.add_subplot(gs[1, 0])
ax2_model = fig.add_subplot(gs[1, 1])
ax2_obs.boxplot(extract_cruise_data(obs, 'Phosphate'), patch_artist=True)
ax2_obs.set_title('Phosphate Surface Only (Observed)')
ax2_obs.set_ylabel('mmol/m$^3$')
ax2_obs.set_xticklabels([])
ax2_model.boxplot(extract_model_data(bgc_sub['PO4']), patch_artist=True)
ax2_model.set_title('Phosphate Surface Only (Model)')
ax2_model.set_xticklabels([])
# --- Silicate (SiO2) Boxplots ---
ax3_obs = fig.add_subplot(gs[2, 0])
ax3_model = fig.add_subplot(gs[2, 1])
ax3_obs.boxplot(extract_cruise_data(obs, 'Silicate'), patch_artist=True)
ax3_obs.set_title('Silicate Surface Only (Observed)')
ax3_obs.set_ylabel('mmol/m$^3$')
ax3_obs.set_xticklabels([])
ax3_model.boxplot(extract_model_data(bgc_sub['SiO3']), patch_artist=True)
ax3_model.set_title('Silicate Surface Only (Model)')
ax3_model.set_xticklabels([])
# --- Ammonium (NH4) Boxplots ---
ax4_obs = fig.add_subplot(gs[3, 0])
ax4_model = fig.add_subplot(gs[3, 1])
ax4_obs.boxplot(extract_cruise_data(obs, 'Ammonium'), patch_artist=True)
ax4_obs.set_title('Ammonium Surface Only (Observed)')
ax4_obs.set_xlabel('Cruise')
ax4_obs.set_ylabel('mmol/m$^3$')
# Use 22 cruises for labels as per your request
labels = pd.to_datetime(obs['time'].isel(time=slice(0, 22))).strftime('%d/%m/%Y')
ax4_obs.set_xticklabels(labels, rotation=45)
ax4_model.boxplot(extract_model_data(bgc_sub['NH4']), patch_artist=True)
ax4_model.set_title('Ammonium Surface Only (Model)')
ax4_model.set_xlabel('Cruise')
ax4_model.set_ylabel('mmol/m$^3$')
ax4_model.set_xticklabels(labels, rotation=45)
plt.tight_layout()
plt.show()Plot: Comparative Diagnostics for Alternate Configuration¶
This figure repeats the multi-panel diagnostics for an alternate model or processing configuration to support side-by-side interpretation.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create figure and subplots
fig = plt.figure(figsize=(12, 16))
gs = fig.add_gridspec(4, 2, height_ratios=[1, 1, 1, 1], width_ratios=[1, 1])
# --- Refactored Data Extraction Functions ---
def extract_cruise_data_all_depths(ds, var_name, num_cruises=22):
"""Filters by flag == 2 and extracts values for all depths across cruises."""
cruises = []
# Identify the flag variable (e.g., 'Silicate_flag')
flag_col = f"{var_name}_flag"
# Apply Quality Flag 2 Filter
filtered_var = ds[var_name].where(ds[flag_col] == 2)
for i in range(num_cruises):
# By not specifying depth_bin, we get the whole water column for that time step
data = np.hstack(filtered_var.isel(time=i))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data_all_depths(var, num_cruises=22):
"""Extracts all depth levels from the model across time steps."""
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i))
cruises.append(data[~np.isnan(data)])
return cruises
# --- Nitrate and Nitrite (Nitrate_and_Nitrite) ---
ax1_obs = fig.add_subplot(gs[0, 0])
ax1_model = fig.add_subplot(gs[0, 1])
ax1_obs.boxplot(extract_cruise_data_all_depths(obs, 'Nitrate_and_Nitrite'), patch_artist=True)
ax1_obs.set_title('Nitrate and Nitrite (All Depths, Observed)')
ax1_obs.set_ylabel('mmol/m$^3$')
ax1_obs.set_xticklabels([])
ax1_model.boxplot(extract_model_data_all_depths(bgc_sub['NO3']), patch_artist=True)
ax1_model.set_title('Nitrate and Nitrite (All Depths, Model)')
ax1_model.set_xticklabels([])
# --- Phosphate (Phosphate) ---
ax2_obs = fig.add_subplot(gs[1, 0])
ax2_model = fig.add_subplot(gs[1, 1])
ax2_obs.boxplot(extract_cruise_data_all_depths(obs, 'Phosphate'), patch_artist=True)
ax2_obs.set_title('Phosphate (All Depths, Observed)')
ax2_obs.set_ylabel('mmol/m$^3$')
ax2_obs.set_xticklabels([])
ax2_model.boxplot(extract_model_data_all_depths(bgc_sub['PO4']), patch_artist=True)
ax2_model.set_title('Phosphate (All Depths, Model)')
ax2_model.set_xticklabels([])
# --- Silicate (Silicate) ---
ax3_obs = fig.add_subplot(gs[2, 0])
ax3_model = fig.add_subplot(gs[2, 1])
ax3_obs.boxplot(extract_cruise_data_all_depths(obs, 'Silicate'), patch_artist=True)
ax3_obs.set_title('Silicate (All Depths, Observed)')
ax3_obs.set_ylabel('mmol/m$^3$')
ax3_obs.set_xticklabels([])
ax3_model.boxplot(extract_model_data_all_depths(bgc_sub['SiO3']), patch_artist=True)
ax3_model.set_title('Silicate (All Depths, Model)')
ax3_model.set_xticklabels([])
# --- Ammonium (Ammonium) ---
ax4_obs = fig.add_subplot(gs[3, 0])
ax4_model = fig.add_subplot(gs[3, 1])
ax4_obs.boxplot(extract_cruise_data_all_depths(obs, 'Ammonium'), patch_artist=True)
ax4_obs.set_title('Ammonium (All Depths, Observed)')
ax4_obs.set_xlabel('Cruise')
ax4_obs.set_ylabel('mmol/m$^3$')
# Use common time labels from the obs dataset
labels = pd.to_datetime(obs['time'].isel(time=slice(0, 22))).strftime('%d/%m/%Y')
ax4_obs.set_xticklabels(labels, rotation=45)
ax4_model.boxplot(extract_model_data_all_depths(bgc_sub['NH4']), patch_artist=True)
ax4_model.set_title('Ammonium (All Depths, Model)')
ax4_model.set_xlabel('Cruise')
ax4_model.set_ylabel('mmol/m$^3$')
ax4_model.set_xticklabels(labels, rotation=45)
plt.tight_layout()
plt.show()Plot: Observed vs Modeled Layered Time Series¶
This set of panels contrasts upper and lower layer time-series trajectories in observations and model output across stations.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# Create figure and subplots
fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(12, 15), sharex=False)
# --- Refactored Data Extraction Functions ---
def extract_depth_avg_obs(ds, var_name, depth_range):
"""Filters by flag == 2 and averages across a depth range (observed)."""
flag_col = f"{var_name}_flag"
# Filter for Flag == 2 (Good Data) and average over stations (HV)
filtered_var = ds[var_name].where(ds[flag_col] == 2).mean('HV', skipna=True)
# Select the depth range using the depth_bin dimension
return filtered_var.sel(depth_bin=slice(*depth_range)).mean(dim='depth_bin', skipna=True)
def extract_depth_avg_model(var, depth_range):
"""Averages across model location and depth range."""
# Average across location (12 stations) and depth slice
return var.mean('location', skipna=True).sel(depth=slice(*depth_range)).mean(dim='depth', skipna=True)
# Define depth ranges (adjusting indices for depth_bin if necessary)
upper_depth_range = (0, 10)
lower_depth_range = (20, 40)
# Model depth ranges (assuming negative values for depth axis)
upper_depth_range_m = (-0.5, -10)
lower_depth_range_m = (-20, -40)
# Define mapping for matching Model keys to Observation keys
nutrient_map = [
{'obs': 'Nitrate_and_Nitrite', 'model': 'NO3', 'title': 'Nitrate and Nitrite'},
{'obs': 'Phosphate', 'model': 'PO4', 'title': 'Phosphate'},
{'obs': 'Silicate', 'model': 'SiO3', 'title': 'Silicate'},
{'obs': 'Ammonium', 'model': 'NH4', 'title': 'Ammonium'}
]
# --- Plotting ---
for i, nut in enumerate(nutrient_map):
# --- Observations (Column 0) ---
ax_obs = axes[i, 0]
obs_upper = extract_depth_avg_obs(obs, nut['obs'], upper_depth_range)
obs_lower = extract_depth_avg_obs(obs, nut['obs'], lower_depth_range)
# Filter NaNs for Upper
mask_u = np.isfinite(obs_upper)
ax_obs.plot(obs['time'][mask_u], obs_upper[mask_u],
label='0-10 m', marker='o', linestyle='-')
# Filter NaNs for Lower
mask_l = np.isfinite(obs_lower)
ax_obs.plot(obs['time'][mask_l], obs_lower[mask_l],
label='20-40 m', marker='s', linestyle='--')
ax_obs.set_title(f"{nut['title']} (Observed)")
ax_obs.set_ylabel('mmol/m$^3$')
ax_obs.legend()
ax_obs.tick_params(axis='x', rotation=45)
ax_obs.set_xlim([pd.Timestamp('2024-04-01'),
pd.Timestamp('2025-04-01')])
# --- Determine observed y-limits ---
ymin = np.nanmin([obs_upper.min().item(), obs_lower.min().item()])
ymax = np.nanmax([obs_upper.max().item(), obs_lower.max().item()])
# Add small padding
yrange = ymax - ymin
ymin -= 0.05 * yrange
ymax += 0.05 * yrange
# --- Model (Column 1) ---
ax_model = axes[i, 1]
model_upper = extract_depth_avg_model(bgc_sub[nut['model']], upper_depth_range_m)
model_lower = extract_depth_avg_model(bgc_sub[nut['model']], lower_depth_range_m)
# Filter NaNs for Model Upper
m_mask_u = np.isfinite(model_upper)
ax_model.plot(bgc_sub['time'][m_mask_u], model_upper[m_mask_u],
label='0-10 m', marker='o', linestyle='-')
# Filter NaNs for Model Lower
m_mask_l = np.isfinite(model_lower)
ax_model.plot(bgc_sub['time'][m_mask_l], model_lower[m_mask_l],
label='20-40 m', marker='s', linestyle='--')
ax_model.set_title(f"{nut['title']} (Model)")
ax_model.legend()
ax_model.tick_params(axis='x', rotation=45)
ax_model.set_xlim([pd.Timestamp('2024-04-01'),
pd.Timestamp('2025-04-01')])
ax_model.set_ylim(ymin, ymax)
plt.tight_layout()
plt.show()Plot: Chlorophyll Layered Time Series¶
These panels compare surface and deep chlorophyll temporal evolution in observations and model output for each station.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# =========================
# Figure (NOW 6 ROWS)
# =========================
fig, axes = plt.subplots(nrows=6, ncols=2, figsize=(12, 20), sharex=False)
# =========================
# Helper functions (unchanged)
# =========================
def extract_depth_avg_obs(ds, var_name, depth_range):
flag_col = f"{var_name}_flag"
filtered_var = ds[var_name].where(ds[flag_col] == 2).mean('HV', skipna=True)
return filtered_var.sel(depth_bin=slice(*depth_range)).mean(dim='depth_bin', skipna=True)
def extract_depth_avg_model(var, depth_range):
return var.mean('location', skipna=True).sel(depth=slice(*depth_range)).mean(dim='depth', skipna=True)
# =========================
# Depth ranges
# =========================
upper_depth_range = (0, 5)
lower_depth_range = (10, 40)
upper_depth_range_m = (-0.5, -10)
lower_depth_range_m = (-20, -40)
# ==================================================
# ROW 0 — CHLOROPHYLL
# ==================================================
ax_obs = axes[0, 0]
ax_model = axes[0, 1]
# --- OBS (NO FLAG!) ---
chl_upper = obs['chl_a'].mean('HV').sel(depth_bin=slice(*upper_depth_range)).mean('depth_bin')
chl_lower = obs['chl_a'].mean('HV').sel(depth_bin=slice(*lower_depth_range)).mean('depth_bin')
mask_u = np.isfinite(chl_upper)
mask_l = np.isfinite(chl_lower)
ax_obs.plot(obs['time'][mask_u], chl_upper[mask_u], label='Surface', marker='o')
ax_obs.plot(obs['time'][mask_l], chl_lower[mask_l], label='Deep', linestyle='--', marker='s')
ax_obs.set_title("Chlorophyll (Observed)")
ax_obs.set_ylabel('mg m$^{-3}$')
ax_obs.legend()
# --- MODEL ---
chl_model = bgc_sub['spChl'] + bgc_sub['diatChl']
chl_upper_m = extract_depth_avg_model(chl_model, upper_depth_range_m).where(mod_mask)
chl_lower_m = extract_depth_avg_model(chl_model, lower_depth_range_m).where(mod_mask)
m_mask_u = np.isfinite(chl_upper_m)
m_mask_l = np.isfinite(chl_lower_m)
ax_model.plot(bgc_sub['time'][m_mask_u], chl_upper_m[m_mask_u], label='Surface', marker='o')
ax_model.plot(bgc_sub['time'][m_mask_l], chl_lower_m[m_mask_l], label='Deep', linestyle='--', marker='s')
ax_model.set_title("Chlorophyll (Model)")
ax_model.legend()
# ==================================================
# EXISTING NUTRIENTS → SHIFT DOWN BY 1 ROW
# ==================================================
nutrient_map = [
{'obs': 'Nitrate_and_Nitrite', 'model': 'NO3', 'title': 'Nitrate and Nitrite'},
{'obs': 'Phosphate', 'model': 'PO4', 'title': 'Phosphate'},
{'obs': 'Silicate', 'model': 'SiO3', 'title': 'Silicate'},
{'obs': 'Ammonium', 'model': 'NH4', 'title': 'Ammonium'}
]
for i, nut in enumerate(nutrient_map):
row = i + 1 # shift down
ax_obs = axes[row, 0]
ax_model = axes[row, 1]
# --- OBS ---
obs_upper = extract_depth_avg_obs(obs, nut['obs'], upper_depth_range)
obs_lower = extract_depth_avg_obs(obs, nut['obs'], lower_depth_range)
mask_u = np.isfinite(obs_upper)
mask_l = np.isfinite(obs_lower)
ax_obs.plot(obs['time'][mask_u], obs_upper[mask_u], label='Surface', marker='o')
ax_obs.plot(obs['time'][mask_l], obs_lower[mask_l], label='Deep', linestyle='--', marker='s')
ax_obs.set_title(f"{nut['title']} (Observed)")
ax_obs.set_ylabel('mmol/m$^3$')
ax_obs.legend()
# y-limits
ymin = np.nanmin([obs_upper.min().item(), obs_lower.min().item()])
ymax = np.nanmax([obs_upper.max().item(), obs_lower.max().item()])
yrange = ymax - ymin
ymin -= 0.05 * yrange
ymax += 0.05 * yrange
# --- MODEL ---
model_upper = extract_depth_avg_model(bgc_sub[nut['model']], upper_depth_range_m)
model_lower = extract_depth_avg_model(bgc_sub[nut['model']], lower_depth_range_m)
m_mask_u = np.isfinite(model_upper)
m_mask_l = np.isfinite(model_lower)
ax_model.plot(bgc_sub['time'][m_mask_u], model_upper[m_mask_u], label='Surface', marker='o')
ax_model.plot(bgc_sub['time'][m_mask_l], model_lower[m_mask_l], label='Deep', linestyle='--', marker='s')
ax_model.set_title(f"{nut['title']} (Model)")
ax_model.legend()
ax_model.set_ylim(ymin, ymax)
# ==================================================
# FINAL ROW — Fe (MODEL ONLY)
# ==================================================
ax_obs = axes[5, 0]
ax_model = axes[5, 1]
# Empty obs panel
ax_obs.axis('off')
ax_obs.set_title("Fe (Observed) — N/A")
# Model Fe
fe_upper = extract_depth_avg_model(bgc_sub['Fe'], upper_depth_range_m)
fe_lower = extract_depth_avg_model(bgc_sub['Fe'], lower_depth_range_m)
m_mask_u = np.isfinite(fe_upper)
m_mask_l = np.isfinite(fe_lower)
ax_model.plot(bgc_sub['time'][m_mask_u], fe_upper[m_mask_u], label='Surface', marker='o')
ax_model.plot(bgc_sub['time'][m_mask_l], fe_lower[m_mask_l], label='Deep', linestyle='--', marker='s')
ax_model.set_title("Iron (Model)")
ax_model.set_ylabel('mmol/m$^3$')
ax_model.legend()
# =========================
# Formatting
# =========================
for ax in axes.flat:
if ax.has_data():
ax.tick_params(axis='x', rotation=45)
ax.set_xlim([pd.Timestamp('2024-04-01'),
pd.Timestamp('2025-04-01')])
plt.tight_layout()
plt.show()Plot: Mean-Series and Driver Comparison¶
This figure compares mean observed chlorophyll with modeled chlorophyll and iron-related series to evaluate potential controlling factors.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# =========================
# Figure setup
# =========================
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 5))
# =========================
# Helper functions
# =========================
def extract_cruise_stats(data_list):
"""Convert list of arrays into mean and std arrays."""
means = np.array([np.nanmean(d) if len(d) > 0 else np.nan for d in data_list])
stds = np.array([np.nanstd(d) if len(d) > 0 else np.nan for d in data_list])
return means, stds
def extract_cruise_data_all_depths(ds, var_name, num_cruises=22):
var = ds[var_name]
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i))
cruises.append(data[~np.isnan(data)])
return cruises
def extract_model_data_all_depths(var, num_cruises=22):
cruises = []
for i in range(num_cruises):
data = np.hstack(var.isel(time=i))
cruises.append(data[~np.isnan(data)])
return cruises
# =========================
# Time axis
# =========================
time = pd.to_datetime(obs['time'].isel(time=slice(0, 22)))
# =========================
# --- OBSERVED (chl_a) ---
# =========================
ax_obs = axes[0]
chl_obs = extract_cruise_data_all_depths(obs, 'chl_a')
mean_obs, std_obs = extract_cruise_stats(chl_obs)
ax_obs.plot(time, mean_obs, color='green', label='Mean')
ax_obs.fill_between(
time,
mean_obs - std_obs,
mean_obs + std_obs,
color='green',
alpha=0.3,
label='±1 std'
)
ax_obs.set_title("Chlorophyll-a (Observed)")
ax_obs.set_ylabel("mg m$^{-3}$")
ax_obs.legend()
ax_obs.tick_params(axis='x', rotation=45)
# =========================
# --- MODEL (chl + Fe) ---
# =========================
ax_model = axes[1]
# --- Chlorophyll ---
chl_model = extract_model_data_all_depths(
bgc_sub['spChl'] + bgc_sub['diatChl']
)
mean_chl, std_chl = extract_cruise_stats(chl_model)
ax_model.plot(time, mean_chl, color='blue', label='Chl mean')
ax_model.fill_between(
time,
mean_chl - std_chl,
mean_chl + std_chl,
color='blue',
alpha=0.3,
label='Chl ±1 std'
)
ax_model.set_title("Model Chlorophyll + Iron")
ax_model.set_ylabel("Chl (mg m$^{-3}$)")
ax_model.tick_params(axis='x', rotation=45)
# =========================
# --- SECONDARY AXIS (Fe) ---
# =========================
ax_fe = ax_model.twinx()
fe_model = extract_model_data_all_depths(bgc_sub['Fe'])
mean_fe, std_fe = extract_cruise_stats(fe_model)
ax_fe.plot(time, mean_fe, color='red', label='Fe mean')
ax_fe.fill_between(
time,
mean_fe - std_fe,
mean_fe + std_fe,
color='red',
alpha=0.2,
label='Fe ±1 std'
)
ax_fe.set_ylabel("Fe (mmol m$^{-3}$)", color='red')
ax_fe.tick_params(axis='y', labelcolor='red')
# =========================
# Legends (separate axes)
# =========================
ax_model.legend(loc='upper left')
ax_fe.legend(loc='upper right')
import datetime as dt
# ... (Keep all your previous map setup and plotting code the same) ...
# =================================================================
# Update Axis Limits and Ticks to restrict to 2024
# =================================================================
# Define your 2024 calendar boundaries
xlim_2024 = [dt.date(2024, 1, 1), dt.date(2024, 12, 31)]
# Apply to Panel (a) — Observed Chlorophyll
ax_map.set_xlim(xlim_2024)
# Clean up any leftover out-of-bounds tick labels
ax_map.set_xticks([dt.date(2024, 3, 1), dt.date(2024, 6, 1),
dt.date(2024, 9, 1), dt.date(2024, 12, 1)])
# Apply to Panel (b) — Model Chlorophyll + Iron
ax_bat.set_xlim(xlim_2024)
ax_bat.set_xticks([dt.date(2024, 3, 1), dt.date(2024, 6, 1),
dt.date(2024, 9, 1), dt.date(2024, 12, 1)])
# Adjust rotation for readability if the dates crowd the axis
fig.autofmt_xdate(rotation=45)
plt.show()
# =========================
# Layout
# =========================
plt.tight_layout()
plt.show()Plot: Vertical/Layered Time-Series Summary¶
This final set of panels summarizes upper-versus-lower layer temporal behavior for key biogeochemical fields.
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# =========================
# Figure setup (6 rows, 1 column)
# =========================
fig, axes = plt.subplots(nrows=6, ncols=1, figsize=(10, 18), sharex=True)
# =========================
# Depth averaging function
# =========================
def extract_depth_avg_model(var, depth_range):
return var.mean('location', skipna=True) \
.sel(depth=slice(*depth_range)) \
.mean(dim='depth', skipna=True)
# =========================
# Depth ranges
# =========================
upper_depth_range_m = (-0.5, -10)
lower_depth_range_m = (-20, -40)
# =========================
# Time axis
# =========================
time = pd.to_datetime(bgc_sub['time'])
# =========================
# Variables to plot
# =========================
model_vars = [
{'var': 'NO3', 'title': 'Nitrate'},
{'var': 'PO4', 'title': 'Phosphate'},
{'var': 'SiO3', 'title': 'Silicate'},
{'var': 'NH4', 'title': 'Ammonium'},
{'var': 'Fe', 'title': 'Iron'},
{'var': 'TOTAL_CHL', 'title': 'Total Chlorophyll'}
]
# =========================
# Loop through variables
# =========================
for i, entry in enumerate(model_vars):
ax = axes[i]
# --- Handle total chlorophyll separately ---
if entry['var'] == 'TOTAL_CHL':
var = bgc_sub['spChl'] + bgc_sub['diatChl']
else:
var = bgc_sub[entry['var']]
# --- Depth averages ---
upper = extract_depth_avg_model(var, upper_depth_range_m)
lower = extract_depth_avg_model(var, lower_depth_range_m)
# --- Masks ---
mask_u = np.isfinite(upper)
mask_l = np.isfinite(lower)
# --- Plot ---
ax.plot(time[mask_u], upper[mask_u],
label='Surface (0–5 m)', marker='o', linestyle='-')
ax.plot(time[mask_l], lower[mask_l],
label='Deep (10–40 m)', marker='s', linestyle='--')
# --- Labels ---
ax.set_title(f"{entry['title']} (Model)")
ax.set_ylabel('mmol m$^{-3}$')
ax.legend()
ax.grid(alpha=0.3)
# --- Consistent x-limits ---
ax.set_xlim([
pd.Timestamp('2024-04-01'),
pd.Timestamp('2025-04-01')
])
# =========================
# X-axis formatting
# =========================
axes[-1].set_xlabel("Time")
for ax in axes:
ax.tick_params(axis='x', rotation=45)
# =========================
# Layout
# =========================
plt.tight_layout()
plt.show()