xarray compatibility
scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.
import numpy as np
from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
"""
Get sample data
"""
return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
)
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100
start = ScmRun(
np.hstack(
[
get_data(years, n_ensemble_members, 5.5, 0.1),
get_data(years, n_ensemble_members, 6.0, 0.05),
]
),
index=years,
columns={
"model": "a_model",
"scenario": "a_scenario",
"variable": [v for v in variables for i in range(n_ensemble_members)],
"region": "World",
"unit": "K",
"ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
},
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
Start: 1750-01-01T00:00:00
End: 2500-01-01T00:00:00
Meta:
ensemble_member model region scenario unit variable
0 0 a_model World a_scenario K gsat
1 1 a_model World a_scenario K gsat
2 2 a_model World a_scenario K gsat
3 3 a_model World a_scenario K gsat
4 4 a_model World a_scenario K gsat
.. ... ... ... ... ... ...
195 95 a_model World a_scenario K gmst
196 96 a_model World a_scenario K gmst
197 97 a_model World a_scenario K gmst
198 98 a_model World a_scenario K gmst
199 99 a_model World a_scenario K gmst
[200 rows x 6 columns]
The usual scmdata methods are of course available.
start.plumeplot(
quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
[<matplotlib.patches.Patch at 0x7f63ee780100>,
<matplotlib.collections.PolyCollection at 0x7f63ee5a07f0>,
<matplotlib.lines.Line2D at 0x7f63ee59fdc0>,
<matplotlib.patches.Patch at 0x7f63ee660b50>,
<matplotlib.lines.Line2D at 0x7f63ee5afd90>,
<matplotlib.lines.Line2D at 0x7f63ee7dd250>,
<matplotlib.patches.Patch at 0x7f63ee580940>,
<matplotlib.lines.Line2D at 0x7f63ee6627f0>,
<matplotlib.lines.Line2D at 0x7f63ee6dbd90>])
However, we can cast to an xarray DataSet and then all the xarray methods become available too.
xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
<xarray.Dataset> Dimensions: (time: 751, ensemble_member: 100) Coordinates: * time (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00 * ensemble_member (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99 Data variables: gsat (ensemble_member, time) float64 0.0 0.007528 ... 5.85 5.777 gmst (ensemble_member, time) float64 0.0 0.00835 ... 5.999 6.152 Attributes: scmdata_metadata_scenario: a_scenario scmdata_metadata_model: a_model scmdata_metadata_region: World
For example, calculating statistics.
xr_ds.median(dim="ensemble_member")
<xarray.Dataset> Dimensions: (time: 751) Coordinates: * time (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00 Data variables: gsat (time) float64 0.0 0.007715 0.01539 0.02325 ... 5.736 5.792 5.779 gmst (time) float64 0.0 0.008185 0.01644 0.02451 ... 6.138 6.121 6.126
Plotting timeseries.
xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7f63ebc66430>,
<matplotlib.lines.Line2D at 0x7f63ebc3b670>,
<matplotlib.lines.Line2D at 0x7f63ebc3b790>,
<matplotlib.lines.Line2D at 0x7f63ebc3b970>,
<matplotlib.lines.Line2D at 0x7f63ebc3ba60>,
<matplotlib.lines.Line2D at 0x7f63ebc3bb50>,
<matplotlib.lines.Line2D at 0x7f63ebc3bc40>,
<matplotlib.lines.Line2D at 0x7f63ebc3bd30>,
<matplotlib.lines.Line2D at 0x7f63ebc3be20>,
<matplotlib.lines.Line2D at 0x7f63ebc3bf10>,
<matplotlib.lines.Line2D at 0x7f63ebc3dfd0>,
<matplotlib.lines.Line2D at 0x7f63ebc3d070>,
<matplotlib.lines.Line2D at 0x7f63ebc3d160>,
<matplotlib.lines.Line2D at 0x7f63ebc3d250>,
<matplotlib.lines.Line2D at 0x7f63ebc3d340>,
<matplotlib.lines.Line2D at 0x7f63ebc3d430>,
<matplotlib.lines.Line2D at 0x7f63ebc3d520>,
<matplotlib.lines.Line2D at 0x7f63ebc3d610>,
<matplotlib.lines.Line2D at 0x7f63ebc3d700>,
<matplotlib.lines.Line2D at 0x7f63ebc3d7f0>,
<matplotlib.lines.Line2D at 0x7f63ebc3d8e0>,
<matplotlib.lines.Line2D at 0x7f63ebc3d9d0>,
<matplotlib.lines.Line2D at 0x7f63ebc3dd00>,
<matplotlib.lines.Line2D at 0x7f63ebc3dbb0>,
<matplotlib.lines.Line2D at 0x7f63ebc37e50>,
<matplotlib.lines.Line2D at 0x7f63ebc37f40>,
<matplotlib.lines.Line2D at 0x7f63ebc37df0>,
<matplotlib.lines.Line2D at 0x7f63ebc37ca0>,
<matplotlib.lines.Line2D at 0x7f63ebc37b50>,
<matplotlib.lines.Line2D at 0x7f63ebc37a30>,
<matplotlib.lines.Line2D at 0x7f63ebc37940>,
<matplotlib.lines.Line2D at 0x7f63ebc37850>,
<matplotlib.lines.Line2D at 0x7f63ebc37760>,
<matplotlib.lines.Line2D at 0x7f63ebc37670>,
<matplotlib.lines.Line2D at 0x7f63ebc37580>,
<matplotlib.lines.Line2D at 0x7f63ebc37490>,
<matplotlib.lines.Line2D at 0x7f63ebc373a0>,
<matplotlib.lines.Line2D at 0x7f63ebc372b0>,
<matplotlib.lines.Line2D at 0x7f63ebc371c0>,
<matplotlib.lines.Line2D at 0x7f63ebc370d0>,
<matplotlib.lines.Line2D at 0x7f63ebc29430>,
<matplotlib.lines.Line2D at 0x7f63ebc29520>,
<matplotlib.lines.Line2D at 0x7f63ebc29610>,
<matplotlib.lines.Line2D at 0x7f63ebc29700>,
<matplotlib.lines.Line2D at 0x7f63ebc297f0>,
<matplotlib.lines.Line2D at 0x7f63ebc298e0>,
<matplotlib.lines.Line2D at 0x7f63ebc29a00>,
<matplotlib.lines.Line2D at 0x7f63ebc29af0>,
<matplotlib.lines.Line2D at 0x7f63ebc29be0>,
<matplotlib.lines.Line2D at 0x7f63ebc29cd0>,
<matplotlib.lines.Line2D at 0x7f63ebc29dc0>,
<matplotlib.lines.Line2D at 0x7f63ebc29eb0>,
<matplotlib.lines.Line2D at 0x7f63ebc29fa0>,
<matplotlib.lines.Line2D at 0x7f63ebc29340>,
<matplotlib.lines.Line2D at 0x7f63ebc29250>,
<matplotlib.lines.Line2D at 0x7f63ebc29160>,
<matplotlib.lines.Line2D at 0x7f63ebc29070>,
<matplotlib.lines.Line2D at 0x7f63ebc8f670>,
<matplotlib.lines.Line2D at 0x7f63ebc8f940>,
<matplotlib.lines.Line2D at 0x7f63ebc8fbb0>,
<matplotlib.lines.Line2D at 0x7f63ebc8ffa0>,
<matplotlib.lines.Line2D at 0x7f63ebc8fe80>,
<matplotlib.lines.Line2D at 0x7f63ebc8fd90>,
<matplotlib.lines.Line2D at 0x7f63ebc8fca0>,
<matplotlib.lines.Line2D at 0x7f63ebc8fac0>,
<matplotlib.lines.Line2D at 0x7f63ebc8f9d0>,
<matplotlib.lines.Line2D at 0x7f63ebc8f700>,
<matplotlib.lines.Line2D at 0x7f63ebc3ffa0>,
<matplotlib.lines.Line2D at 0x7f63ebc3feb0>,
<matplotlib.lines.Line2D at 0x7f63ebc3fdc0>,
<matplotlib.lines.Line2D at 0x7f63ebc3fcd0>,
<matplotlib.lines.Line2D at 0x7f63ebc3fbe0>,
<matplotlib.lines.Line2D at 0x7f63ebc3faf0>,
<matplotlib.lines.Line2D at 0x7f63ebc3fa00>,
<matplotlib.lines.Line2D at 0x7f63ebc3f910>,
<matplotlib.lines.Line2D at 0x7f63ebc3f820>,
<matplotlib.lines.Line2D at 0x7f63ebc3f730>,
<matplotlib.lines.Line2D at 0x7f63ebc3f640>,
<matplotlib.lines.Line2D at 0x7f63ebc3f550>,
<matplotlib.lines.Line2D at 0x7f63ebc3f460>,
<matplotlib.lines.Line2D at 0x7f63ebc3f370>,
<matplotlib.lines.Line2D at 0x7f63ebc3f280>,
<matplotlib.lines.Line2D at 0x7f63ebc3f190>,
<matplotlib.lines.Line2D at 0x7f63ebc3f0a0>,
<matplotlib.lines.Line2D at 0x7f63ebc30f70>,
<matplotlib.lines.Line2D at 0x7f63ebc30e80>,
<matplotlib.lines.Line2D at 0x7f63ebc30d90>,
<matplotlib.lines.Line2D at 0x7f63ebc30ca0>,
<matplotlib.lines.Line2D at 0x7f63ebc30bb0>,
<matplotlib.lines.Line2D at 0x7f63ebc30ac0>,
<matplotlib.lines.Line2D at 0x7f63ebc309d0>,
<matplotlib.lines.Line2D at 0x7f63ebc308e0>,
<matplotlib.lines.Line2D at 0x7f63ebc307f0>,
<matplotlib.lines.Line2D at 0x7f63ebc30700>,
<matplotlib.lines.Line2D at 0x7f63ebc30610>,
<matplotlib.lines.Line2D at 0x7f63ebc30520>,
<matplotlib.lines.Line2D at 0x7f63ebc30430>,
<matplotlib.lines.Line2D at 0x7f63ebc30340>,
<matplotlib.lines.Line2D at 0x7f63ebc30250>,
<matplotlib.lines.Line2D at 0x7f63ebc30160>]
Selecting and plotting timeseries.
xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7f63ebb5aa60>,
<matplotlib.lines.Line2D at 0x7f63ebb00b20>,
<matplotlib.lines.Line2D at 0x7f63ebb00c70>,
<matplotlib.lines.Line2D at 0x7f63ebb00df0>,
<matplotlib.lines.Line2D at 0x7f63ebb00ee0>,
<matplotlib.lines.Line2D at 0x7f63ebb00fd0>,
<matplotlib.lines.Line2D at 0x7f63ebb0f100>,
<matplotlib.lines.Line2D at 0x7f63ebb0f1f0>,
<matplotlib.lines.Line2D at 0x7f63ebb0f2e0>,
<matplotlib.lines.Line2D at 0x7f63ebb0f3d0>]
Scatter plots.
xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7f63eba27fd0>
Or combinations of calculations and plots.
xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7f63eb9781f0>