{ "cells": [ { "cell_type": "markdown", "id": "e3475f48-f578-4dd3-ba2d-3a867c6017ec", "metadata": {}, "source": [ "# Idealized Synthetic Data\n", "\n", "*Under development*" ] }, { "cell_type": "code", "execution_count": null, "id": "e269871b-d164-4c66-a41a-fb9363519976", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "from IPython.display import display # so can run as script too\n", "\n", "from melodies_monet import driver" ] }, { "cell_type": "code", "execution_count": null, "id": "ccc2046a-96dc-4f69-9eca-489d06decda9", "metadata": {}, "outputs": [], "source": [ "an = driver.analysis()\n", "an.control = \"control_idealized.yaml\"\n", "an.read_control()\n", "an" ] }, { "cell_type": "markdown", "id": "da810d1b-4197-464e-9533-29baf8a2bbce", "metadata": {}, "source": [ "````{admonition} Note: This is the complete file that was loaded.\n", ":class: dropdown\n", "\n", "```{literalinclude} control_idealized.yaml\n", ":caption:\n", ":linenos:\n", "```\n", "````" ] }, { "cell_type": "markdown", "id": "4ddcd0a8-28fd-4438-8a50-3ea8cfec1154", "metadata": {}, "source": [ "## Generate data\n", "\n", "### Model" ] }, { "cell_type": "code", "execution_count": null, "id": "6da3a22f-7d2d-49b5-973c-a6758463aefb", "metadata": { "tags": [ "hide-input" ] }, "outputs": [], "source": [ "rs = np.random.RandomState(42)\n", "\n", "control = an.control_dict\n", "\n", "nlat = 100\n", "nlon = 200\n", "\n", "lon = np.linspace(-161, -60, nlon)\n", "lat = np.linspace(18, 60, nlat)\n", "Lon, Lat = np.meshgrid(lon, lat)\n", "\n", "time = pd.date_range(control['analysis']['start_time'], control['analysis']['end_time'], freq=\"3H\")\n", "ntime = time.size\n", "\n", "# Generate translating and expanding Gaussian\n", "x_ = np.linspace(-1, 1, lon.size)\n", "y_ = np.linspace(-1, 1, lat.size)\n", "x, y = np.meshgrid(x_, y_)\n", "mu = np.linspace(-0.5, 0.5, ntime)\n", "sigma = np.linspace(0.3, 1, ntime)\n", "g = np.exp(\n", " -(\n", " (\n", " (x[np.newaxis, ...] - mu[:, np.newaxis, np.newaxis])**2\n", " + y[np.newaxis, ...]**2\n", " ) / ( \n", " 2 * sigma[:, np.newaxis, np.newaxis]**2\n", " ) \n", " ) \n", ")\n", "\n", "# Coordinates\n", "lat_da = xr.DataArray(lat, dims=\"lat\", attrs={'longname': 'latitude', 'units': 'degN'}, name=\"lat\")\n", "lon_da = xr.DataArray(lon, dims=\"lon\", attrs={'longname': 'longitude', 'units': 'degE'}, name=\"lon\")\n", "time_da = xr.DataArray(time, dims=\"time\", name=\"time\")\n", "\n", "# Generate dataset\n", "field_names = control['model']['test_model']['variables'].keys()\n", "ds_dict = dict()\n", "for field_name in field_names:\n", " units = control['model']['test_model']['variables'][field_name]['units']\n", " # data = rs.rand(ntime, nlat, nlon)\n", " data = g\n", " da = xr.DataArray(\n", " data,\n", " # coords={\"lat\": lat_da, \"lon\": lon_da, \"time\": time_da},\n", " coords=[time_da, lat_da, lon_da],\n", " dims=['time', 'lat', 'lon'],\n", " attrs={'units': units},\n", " )\n", " ds_dict[field_name] = da\n", "ds = xr.Dataset(ds_dict).expand_dims(\"z\", axis=1)\n", "ds[\"z\"] = [1]\n", "\n", "ds_mod = ds\n", "ds_mod" ] }, { "cell_type": "code", "execution_count": null, "id": "6d873918-ced0-4658-a739-67008fc11e4b", "metadata": {}, "outputs": [], "source": [ "ds.squeeze(\"z\").A.plot(col=\"time\", col_wrap=5, size=3);" ] }, { "cell_type": "code", "execution_count": null, "id": "be956982-814e-4e15-bfb9-674926158dc1", "metadata": {}, "outputs": [], "source": [ "ds.to_netcdf(control['model']['test_model']['files'])" ] }, { "cell_type": "markdown", "id": "ebe5aecf-5482-4fc4-a1a1-020670f53548", "metadata": {}, "source": [ "### Obs" ] }, { "cell_type": "code", "execution_count": null, "id": "9b5d5cd4-9701-4252-ac24-9107e6018d0c", "metadata": { "tags": [ "hide-input" ] }, "outputs": [], "source": [ "from string import ascii_lowercase\n", "\n", "# Generate positions\n", "# TODO: only within land boundaries would be cleaner\n", "n = 500\n", "lats = rs.uniform(lat[0], lat[-1], n)\n", "lons = rs.uniform(lon[0], lon[-1], n)\n", "siteid = np.array([f\"{x:0{len(str(n))}}\" for x in range(n)])[np.newaxis, :]\n", "something_vlen = np.array(\n", " [\n", " \"\".join(rs.choice(list(ascii_lowercase), size=x, replace=True))\n", " for x in rs.randint(low=2, high=8, size=n)\n", " ],\n", " dtype=str,\n", ")[np.newaxis, :]\n", "\n", "# Generate dataset\n", "field_names = control['model']['test_model']['variables'].keys()\n", "ds_dict = dict()\n", "for field_name0 in field_names:\n", " field_name = control['model']['test_model']['mapping']['test_obs'][field_name0]\n", " units = control['model']['test_model']['variables'][field_name0]['units']\n", " values = (\n", " ds_mod.A.squeeze().interp(lat=xr.DataArray(lats), lon=xr.DataArray(lons)).values\n", " + rs.normal(scale=0.3, size=(ntime, n))\n", " )[:, np.newaxis]\n", " da = xr.DataArray(\n", " values,\n", " coords={\n", " \"x\": (\"x\", np.arange(n)), # !!!\n", " \"time\": (\"time\", time),\n", " \"latitude\": ((\"y\", \"x\"), lats[np.newaxis, :], lat_da.attrs),\n", " \"longitude\": ((\"y\", \"x\"), lons[np.newaxis, :], lon_da.attrs),\n", " \"siteid\": ((\"y\", \"x\"), siteid),\n", " \"something_vlen\": ((\"y\", \"x\"), something_vlen),\n", " },\n", " dims=(\"time\", \"y\", \"x\"),\n", " attrs={'units': units},\n", " )\n", " ds_dict[field_name] = da\n", "ds = xr.Dataset(ds_dict)\n", "ds" ] }, { "cell_type": "code", "execution_count": null, "id": "5ce817dd-c555-45ee-ae21-89af66abebd3", "metadata": {}, "outputs": [], "source": [ "ds.to_netcdf(control['obs']['test_obs']['filename'])" ] }, { "cell_type": "markdown", "id": "cb8e55c3-063a-401b-8b26-a76b68cae527", "metadata": {}, "source": [ "## Load" ] }, { "cell_type": "code", "execution_count": null, "id": "2c09dc6e-82ab-43fa-82f8-2715f4aa448e", "metadata": {}, "outputs": [], "source": [ "an.open_models()" ] }, { "cell_type": "code", "execution_count": null, "id": "0f63480d-2cee-404e-b1d5-655b00d3832a", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "an.models['test_model'].obj" ] }, { "cell_type": "code", "execution_count": null, "id": "cbd30db3-9d78-4752-9457-fab3e25bf91f", "metadata": {}, "outputs": [], "source": [ "an.open_obs()" ] }, { "cell_type": "code", "execution_count": null, "id": "d773da3c-d8fe-41be-99b0-f2e18588917a", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "an.obs['test_obs'].obj" ] }, { "cell_type": "markdown", "id": "df8efba3-f801-41ff-82cc-2ce7356fe1df", "metadata": {}, "source": [ "## Pair" ] }, { "cell_type": "code", "execution_count": null, "id": "2423b73a-d5d9-4700-bdab-9c3ed1378fb6", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%time\n", "\n", "an.pair_data()" ] }, { "cell_type": "code", "execution_count": null, "id": "58d9ff77-09c3-4761-bfdb-f90aeb030314", "metadata": {}, "outputs": [], "source": [ "an.paired" ] }, { "cell_type": "code", "execution_count": null, "id": "02f11879-2172-4a13-bf70-b73fc474987f", "metadata": {}, "outputs": [], "source": [ "an.paired['test_obs_test_model'].obj" ] }, { "cell_type": "code", "execution_count": null, "id": "969971dc-3023-46cc-b7b6-31a246641fde", "metadata": {}, "outputs": [], "source": [ "an.paired['test_obs_test_model'].obj.dims" ] }, { "cell_type": "markdown", "id": "546f4430-da7f-48ca-b861-00397156631f", "metadata": {}, "source": [ "## Plot" ] }, { "cell_type": "code", "execution_count": null, "id": "e415521b-1d33-4760-80ec-f81db96496df", "metadata": {}, "outputs": [], "source": [ "%%time\n", "\n", "an.plotting()" ] }, { "cell_type": "markdown", "id": "054d43e2-3fd8-4373-a42a-5e96608aabe6", "metadata": {}, "source": [ "## Save/load paired data -- netCDF\n", "\n", "And compare to the original pair object." ] }, { "cell_type": "code", "execution_count": null, "id": "53eb8ff2-d9f2-4405-9c98-25be370ac2f3", "metadata": {}, "outputs": [], "source": [ "from copy import deepcopy\n", "\n", "p0 = deepcopy(an.paired['test_obs_test_model'].obj)\n", "\n", "an.save_analysis()\n", "an.read_analysis()\n", "p1 = deepcopy(an.paired['test_obs_test_model'].obj)\n", "p1.close()\n", "\n", "display(p0)\n", "display(p1)\n", "assert p1 is not p0 and p1.equals(p0)" ] }, { "cell_type": "markdown", "id": "5821ec51-0862-4c7a-ae77-f32b5d240881", "metadata": {}, "source": [ "## Save/load paired data -- Python object" ] }, { "cell_type": "code", "execution_count": null, "id": "4fdc9d1d-1dd5-41e1-a8d3-83026d05d27b", "metadata": { "tags": [] }, "outputs": [], "source": [ "print(an.save)\n", "an.save[\"paired\"][\"method\"] = \"pkl\"\n", "del an.save[\"paired\"][\"prefix\"]\n", "# We could leave `prefix` since unused, but we need to set `output_name`\n", "an.save[\"paired\"][\"output_name\"] = \"asdf.joblib\"\n", "print(\"->\", an.save)\n", "\n", "print()\n", "print(an.read)\n", "an.read[\"paired\"][\"method\"] = \"pkl\"\n", "an.read[\"paired\"][\"filenames\"] = \"asdf.joblib\"\n", "print(\"->\", an.read)\n", "\n", "print()\n", "an.save_analysis()\n", "an.read_analysis()\n", "p2 = deepcopy(an.paired['test_obs_test_model'].obj)\n", "p2.close()\n", "\n", "# display(p0)\n", "display(p2)\n", "assert p2 is not p0 and p2.equals(p0)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 5 }