xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    """
    Get sample data
    """
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.1/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7f63ee780100>,
  <matplotlib.collections.PolyCollection at 0x7f63ee5a07f0>,
  <matplotlib.lines.Line2D at 0x7f63ee59fdc0>,
  <matplotlib.patches.Patch at 0x7f63ee660b50>,
  <matplotlib.lines.Line2D at 0x7f63ee5afd90>,
  <matplotlib.lines.Line2D at 0x7f63ee7dd250>,
  <matplotlib.patches.Patch at 0x7f63ee580940>,
  <matplotlib.lines.Line2D at 0x7f63ee6627f0>,
  <matplotlib.lines.Line2D at 0x7f63ee6dbd90>])
../_images/cd4fc91e1b37ac451bec825c6a7222ebf4633674f9b90da6c662565ea4357122.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gsat             (ensemble_member, time) float64 0.0 0.007528 ... 5.85 5.777
    gmst             (ensemble_member, time) float64 0.0 0.00835 ... 5.999 6.152
Attributes:
    scmdata_metadata_scenario:  a_scenario
    scmdata_metadata_model:     a_model
    scmdata_metadata_region:    World

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gsat     (time) float64 0.0 0.007715 0.01539 0.02325 ... 5.736 5.792 5.779
    gmst     (time) float64 0.0 0.008185 0.01644 0.02451 ... 6.138 6.121 6.126

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7f63ebc66430>,
 <matplotlib.lines.Line2D at 0x7f63ebc3b670>,
 <matplotlib.lines.Line2D at 0x7f63ebc3b790>,
 <matplotlib.lines.Line2D at 0x7f63ebc3b970>,
 <matplotlib.lines.Line2D at 0x7f63ebc3ba60>,
 <matplotlib.lines.Line2D at 0x7f63ebc3bb50>,
 <matplotlib.lines.Line2D at 0x7f63ebc3bc40>,
 <matplotlib.lines.Line2D at 0x7f63ebc3bd30>,
 <matplotlib.lines.Line2D at 0x7f63ebc3be20>,
 <matplotlib.lines.Line2D at 0x7f63ebc3bf10>,
 <matplotlib.lines.Line2D at 0x7f63ebc3dfd0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d070>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d160>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d250>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d340>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d430>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d520>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d610>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d700>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d7f0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d8e0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3d9d0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3dd00>,
 <matplotlib.lines.Line2D at 0x7f63ebc3dbb0>,
 <matplotlib.lines.Line2D at 0x7f63ebc37e50>,
 <matplotlib.lines.Line2D at 0x7f63ebc37f40>,
 <matplotlib.lines.Line2D at 0x7f63ebc37df0>,
 <matplotlib.lines.Line2D at 0x7f63ebc37ca0>,
 <matplotlib.lines.Line2D at 0x7f63ebc37b50>,
 <matplotlib.lines.Line2D at 0x7f63ebc37a30>,
 <matplotlib.lines.Line2D at 0x7f63ebc37940>,
 <matplotlib.lines.Line2D at 0x7f63ebc37850>,
 <matplotlib.lines.Line2D at 0x7f63ebc37760>,
 <matplotlib.lines.Line2D at 0x7f63ebc37670>,
 <matplotlib.lines.Line2D at 0x7f63ebc37580>,
 <matplotlib.lines.Line2D at 0x7f63ebc37490>,
 <matplotlib.lines.Line2D at 0x7f63ebc373a0>,
 <matplotlib.lines.Line2D at 0x7f63ebc372b0>,
 <matplotlib.lines.Line2D at 0x7f63ebc371c0>,
 <matplotlib.lines.Line2D at 0x7f63ebc370d0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29430>,
 <matplotlib.lines.Line2D at 0x7f63ebc29520>,
 <matplotlib.lines.Line2D at 0x7f63ebc29610>,
 <matplotlib.lines.Line2D at 0x7f63ebc29700>,
 <matplotlib.lines.Line2D at 0x7f63ebc297f0>,
 <matplotlib.lines.Line2D at 0x7f63ebc298e0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29a00>,
 <matplotlib.lines.Line2D at 0x7f63ebc29af0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29be0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29cd0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29dc0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29eb0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29fa0>,
 <matplotlib.lines.Line2D at 0x7f63ebc29340>,
 <matplotlib.lines.Line2D at 0x7f63ebc29250>,
 <matplotlib.lines.Line2D at 0x7f63ebc29160>,
 <matplotlib.lines.Line2D at 0x7f63ebc29070>,
 <matplotlib.lines.Line2D at 0x7f63ebc8f670>,
 <matplotlib.lines.Line2D at 0x7f63ebc8f940>,
 <matplotlib.lines.Line2D at 0x7f63ebc8fbb0>,
 <matplotlib.lines.Line2D at 0x7f63ebc8ffa0>,
 <matplotlib.lines.Line2D at 0x7f63ebc8fe80>,
 <matplotlib.lines.Line2D at 0x7f63ebc8fd90>,
 <matplotlib.lines.Line2D at 0x7f63ebc8fca0>,
 <matplotlib.lines.Line2D at 0x7f63ebc8fac0>,
 <matplotlib.lines.Line2D at 0x7f63ebc8f9d0>,
 <matplotlib.lines.Line2D at 0x7f63ebc8f700>,
 <matplotlib.lines.Line2D at 0x7f63ebc3ffa0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3feb0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3fdc0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3fcd0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3fbe0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3faf0>,
 <matplotlib.lines.Line2D at 0x7f63ebc3fa00>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f910>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f820>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f730>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f640>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f550>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f460>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f370>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f280>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f190>,
 <matplotlib.lines.Line2D at 0x7f63ebc3f0a0>,
 <matplotlib.lines.Line2D at 0x7f63ebc30f70>,
 <matplotlib.lines.Line2D at 0x7f63ebc30e80>,
 <matplotlib.lines.Line2D at 0x7f63ebc30d90>,
 <matplotlib.lines.Line2D at 0x7f63ebc30ca0>,
 <matplotlib.lines.Line2D at 0x7f63ebc30bb0>,
 <matplotlib.lines.Line2D at 0x7f63ebc30ac0>,
 <matplotlib.lines.Line2D at 0x7f63ebc309d0>,
 <matplotlib.lines.Line2D at 0x7f63ebc308e0>,
 <matplotlib.lines.Line2D at 0x7f63ebc307f0>,
 <matplotlib.lines.Line2D at 0x7f63ebc30700>,
 <matplotlib.lines.Line2D at 0x7f63ebc30610>,
 <matplotlib.lines.Line2D at 0x7f63ebc30520>,
 <matplotlib.lines.Line2D at 0x7f63ebc30430>,
 <matplotlib.lines.Line2D at 0x7f63ebc30340>,
 <matplotlib.lines.Line2D at 0x7f63ebc30250>,
 <matplotlib.lines.Line2D at 0x7f63ebc30160>]
../_images/bdb296932dd7dc078a5d6a8c8356b7110770a2fe90f1a8b846311073ef215241.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7f63ebb5aa60>,
 <matplotlib.lines.Line2D at 0x7f63ebb00b20>,
 <matplotlib.lines.Line2D at 0x7f63ebb00c70>,
 <matplotlib.lines.Line2D at 0x7f63ebb00df0>,
 <matplotlib.lines.Line2D at 0x7f63ebb00ee0>,
 <matplotlib.lines.Line2D at 0x7f63ebb00fd0>,
 <matplotlib.lines.Line2D at 0x7f63ebb0f100>,
 <matplotlib.lines.Line2D at 0x7f63ebb0f1f0>,
 <matplotlib.lines.Line2D at 0x7f63ebb0f2e0>,
 <matplotlib.lines.Line2D at 0x7f63ebb0f3d0>]
../_images/db0763ac4a559f3b650cd9d38ba8ee2efed9fec2d591eb70ff113b6704f38b4a.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7f63eba27fd0>
../_images/7700a3489c286da0e857a1757c5f420fdd3e24a9f0c5d144b24ee3729b25ed1f.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7f63eb9781f0>
../_images/75dc987f8beb093489b29b276784c118c8947fc882741bdeed9152394f0693a8.png