Idealized Synthetic Data
Under development
import numpy as np
import pandas as pd
import xarray as xr
from melodies_monet import driver
Please install h5py to open files from the Amazon S3 servers.
Please install h5netcdf to open files from the Amazon S3 servers.
an = driver.analysis()
an.control = "control_idealized.yaml"
an.read_control()
an
analysis(
control='control_idealized.yaml',
control_dict=...,
models={},
obs={},
paired={},
start_time=Timestamp('2019-09-09 00:00:00'),
end_time=Timestamp('2019-09-10 00:00:00'),
download_maps=True,
output_dir='./output/idealized',
debug=True,
)
Note: This is the complete file that was loaded.
control_idealized.yaml
1analysis:
2 start_time: "2019-09-09 00:00"
3 end_time: "2019-09-10 00:00"
4 output_dir: ./output/idealized
5 debug: True
6
7model:
8 test_model:
9 files: test_model.nc
10 mod_type: random
11 variables:
12 A:
13 units: "Units of A"
14 unit_scale: 1
15 unit_scale_method: "*"
16 B:
17 units: "Units of B"
18 unit_scale: 1
19 unit_scale_method: "*"
20 mapping:
21 test_obs:
22 A: "A_obs"
23 B: "B_obs"
24 projection: None # unused
25
26obs:
27 test_obs:
28 # use_airnow: True
29 filename: test_obs.nc
30 obs_type: pt_sfc
31
32plots:
33 plot_grp1:
34 type: 'timeseries'
35 default_plot_kwargs: # required (with at least one key)
36 linewidth: 2.0
37 domain_type: ['all'] # required
38 domain_name: ['CONUS'] # required
39 data: ['test_obs_test_model'] # required
40 data_proc: # These four seem to be required for time series
41 rem_obs_nan: True # True: Remove all points where model or obs variable is NaN. False: Remove only points where model variable is NaN.
42 ts_select_time: 'time' # 'time' for UTC or 'time_local'
43 ts_avg_window: '3H' # Options: None for no averaging or list pandas resample rule (e.g., 'H', 'D')
44 # ^ TODO: null setting seems not working
45 set_axis: False # If True, add vmin_plot and vmax_plot for each variable in obs.
46
47 plot_grp2:
48 type: 'spatial_overlay'
49 fig_kwargs:
50 states: True
51 figsize: [10, 5]
52 domain_type: ['all'] # required
53 domain_name: ['CONUS'] # required
54 data: ['test_obs_test_model'] # required
55 data_proc:
56 rem_obs_nan: True
57 set_axis: True
Generate data
Model
rs = np.random.RandomState(42)
control = an.control_dict
nlat = 100
nlon = 200
lon = np.linspace(-161, -60, nlon)
lat = np.linspace(18, 60, nlat)
Lon, Lat = np.meshgrid(lon, lat)
time = pd.date_range(control['analysis']['start_time'], control['analysis']['end_time'], freq="3H")
ntime = time.size
# Generate translating and expanding Gaussian
x_ = np.linspace(-1, 1, lon.size)
y_ = np.linspace(-1, 1, lat.size)
x, y = np.meshgrid(x_, y_)
mu = np.linspace(-0.5, 0.5, ntime)
sigma = np.linspace(0.3, 1, ntime)
g = np.exp(
-(
(
(x[np.newaxis, ...] - mu[:, np.newaxis, np.newaxis])**2
+ y[np.newaxis, ...]**2
) / (
2 * sigma[:, np.newaxis, np.newaxis]**2
)
)
)
# Coordinates
lat_da = xr.DataArray(lat, dims="lat", attrs={'longname': 'latitude', 'units': 'degN'}, name="lat")
lon_da = xr.DataArray(lon, dims="lon", attrs={'longname': 'longitude', 'units': 'degE'}, name="lon")
time_da = xr.DataArray(time, dims="time", name="time")
# Generate dataset
field_names = control['model']['test_model']['variables'].keys()
ds_dict = dict()
for field_name in field_names:
units = control['model']['test_model']['variables'][field_name]['units']
# data = rs.rand(ntime, nlat, nlon)
data = g
da = xr.DataArray(
data,
# coords={"lat": lat_da, "lon": lon_da, "time": time_da},
coords=[time_da, lat_da, lon_da],
dims=['time', 'lat', 'lon'],
attrs={'units': units},
)
ds_dict[field_name] = da
ds = xr.Dataset(ds_dict).expand_dims("z", axis=1)
ds["z"] = [1]
ds_mod = ds
ds_mod
<xarray.Dataset> Dimensions: (time: 9, lat: 100, lon: 200, z: 1) Coordinates: * time (time) datetime64[ns] 2019-09-09 2019-09-09T03:00:00 ... 2019-09-10 * lat (lat) float64 18.0 18.42 18.85 19.27 ... 58.73 59.15 59.58 60.0 * lon (lon) float64 -161.0 -160.5 -160.0 -159.5 ... -61.02 -60.51 -60.0 * z (z) int32 1 Data variables: A (time, z, lat, lon) float64 0.000964 0.001019 ... 0.5379 0.5353 B (time, z, lat, lon) float64 0.000964 0.001019 ... 0.5379 0.5353
ds.squeeze("z").A.plot(col="time")
<xarray.plot.facetgrid.FacetGrid at 0x14c279814c0>

ds.to_netcdf(control['model']['test_model']['files'])
Obs
# Generate positions
# TODO: only within land boundaries
n = 500
lats = rs.uniform(lat[0], lat[-1], n)#[np.newaxis, :]
lons = rs.uniform(lon[0], lon[-1], n)#[np.newaxis, :]
siteid = np.arange(n)[np.newaxis, :].astype(str)
# Generate dataset
field_names = control['model']['test_model']['variables'].keys()
ds_dict = dict()
for field_name0 in field_names:
field_name = control['model']['test_model']['mapping']['test_obs'][field_name0]
units = control['model']['test_model']['variables'][field_name0]['units']
values = (
ds_mod.A.squeeze().interp(lat=xr.DataArray(lats), lon=xr.DataArray(lons)).values
+ rs.normal(scale=0.3, size=(ntime, n))
)[:, np.newaxis]
da = xr.DataArray(
values,
coords={
"x": ("x", np.arange(n)), # !!!
"time": ("time", time),
"latitude": (("y", "x"), lats[np.newaxis, :], lat_da.attrs),
"longitude": (("y", "x"), lons[np.newaxis, :], lon_da.attrs),
"siteid": (("y", "x"), siteid),
},
dims=("time", "y", "x"),
attrs={'units': units},
)
ds_dict[field_name] = da
ds = xr.Dataset(ds_dict)
ds
<xarray.Dataset> Dimensions: (x: 500, time: 9, y: 1) Coordinates: * x (x) int32 0 1 2 3 4 5 6 7 8 ... 492 493 494 495 496 497 498 499 * time (time) datetime64[ns] 2019-09-09 ... 2019-09-10 latitude (y, x) float64 33.73 57.93 48.74 43.14 ... 21.26 58.92 59.42 longitude (y, x) float64 -90.49 -106.9 -129.7 ... -147.2 -65.03 -116.0 siteid (y, x) <U11 '0' '1' '2' '3' '4' ... '495' '496' '497' '498' '499' Dimensions without coordinates: y Data variables: A_obs (time, y, x) float64 0.06143 -0.3988 0.3937 ... 0.3557 0.5418 B_obs (time, y, x) float64 -0.1842 0.1995 0.5878 ... 0.8395 0.6286
ds.to_netcdf(control['obs']['test_obs']['filename'])
Load data
an.open_models()
test_model
{'files': 'test_model.nc', 'mod_type': 'random', 'variables': {'A': {'units': 'Units of A', 'unit_scale': 1, 'unit_scale_method': '*'}, 'B': {'units': 'Units of B', 'unit_scale': 1, 'unit_scale_method': '*'}}, 'mapping': {'test_obs': {'A': 'A_obs', 'B': 'B_obs'}}, 'projection': 'None'}
test_model.nc
an.models['test_model'].obj
<xarray.Dataset> Dimensions: (time: 9, lat: 100, lon: 200, z: 1) Coordinates: * time (time) datetime64[ns] 2019-09-09 2019-09-09T03:00:00 ... 2019-09-10 * lat (lat) float64 18.0 18.42 18.85 19.27 ... 58.73 59.15 59.58 60.0 * lon (lon) float64 -161.0 -160.5 -160.0 -159.5 ... -61.02 -60.51 -60.0 * z (z) int32 1 Data variables: A (time, z, lat, lon) float64 0.000964 0.001019 ... 0.5379 0.5353 B (time, z, lat, lon) float64 0.000964 0.001019 ... 0.5379 0.5353
an.open_obs()
an.obs['test_obs'].obj
<xarray.Dataset> Dimensions: (x: 500, time: 9, y: 1) Coordinates: * x (x) int32 0 1 2 3 4 5 6 7 8 ... 492 493 494 495 496 497 498 499 * time (time) datetime64[ns] 2019-09-09 ... 2019-09-10 latitude (y, x) float64 33.73 57.93 48.74 43.14 ... 21.26 58.92 59.42 longitude (y, x) float64 -90.49 -106.9 -129.7 ... -147.2 -65.03 -116.0 siteid (y, x) object '0' '1' '2' '3' '4' ... '496' '497' '498' '499' Dimensions without coordinates: y Data variables: A_obs (time, y, x) float64 0.06143 -0.3988 0.3937 ... 0.3557 0.5418 B_obs (time, y, x) float64 -0.1842 0.1995 0.5878 ... 0.8395 0.6286
%%time
an.pair_data()
[########################################] | 100% Completed | 0.1s
[########################################] | 100% Completed | 0.1s
[########################################] | 100% Completed | 0.1s
[########################################] | 100% Completed | 0.1s
Wall time: 201 ms
an.paired
{'test_obs_test_model': pair(
type='pt_sfc',
radius_of_influence=1000000.0,
obs='test_obs',
model='test_model',
model_vars=['A', 'B'],
obs_vars=['A_obs', 'B_obs'],
filename='test_obs_test_model.nc',
)}
an.paired['test_obs_test_model'].obj
<xarray.Dataset> Dimensions: (x: 500, time: 9) Coordinates: * x (x) int32 0 1 2 3 4 5 6 7 8 ... 492 493 494 495 496 497 498 499 * time (time) datetime64[ns] 2019-09-09 ... 2019-09-10 Data variables: A_obs (time, x) float64 0.06143 -0.3988 0.1292 ... 0.6373 0.1377 0.5593 B_obs (time, x) float64 -0.1842 0.1995 0.4724 ... 0.8742 0.2222 0.3161 A (time, x) float64 0.008033 0.001784 0.0003207 ... 0.3443 0.2698 B (time, x) float64 0.008033 0.001784 0.0003207 ... 0.3443 0.2698 latitude (x) float64 33.73 57.93 18.86 19.32 ... 39.95 35.96 19.07 22.53 longitude (x) float64 -90.49 -106.9 -99.05 -143.9 ... -143.6 -141.6 -156.9 siteid (x) object '0' '1' '10' '100' '101' ... '95' '96' '97' '98' '99'
an.paired['test_obs_test_model'].obj.dims
Frozen({'x': 500, 'time': 9})
Plot
%%time
an.plotting()
Warning: variables dict for A_obs not provided, so defaults used
Warning: variables dict for B_obs not provided, so defaults used
Wall time: 4.19 s



