xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    """
    Get sample data
    """
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7fb116000760>,
  <matplotlib.collections.PolyCollection at 0x7fb11600c850>,
  <matplotlib.lines.Line2D at 0x7fb115e0de20>,
  <matplotlib.patches.Patch at 0x7fb115e22250>,
  <matplotlib.lines.Line2D at 0x7fb115fac100>,
  <matplotlib.lines.Line2D at 0x7fb115fac160>,
  <matplotlib.patches.Patch at 0x7fb115e22310>,
  <matplotlib.lines.Line2D at 0x7fb115e22280>,
  <matplotlib.lines.Line2D at 0x7fb115e228e0>])
../_images/136d310c9a3a60a2c0f09da9ad2144e05f4d34089ff72189b13876df759bc496.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/latest/src/scmdata/_xarray.py:236: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  else timeseries.T.stack(dimensions)
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gsat             (ensemble_member, time) float64 0.0 0.007471 ... 5.916
    gmst             (ensemble_member, time) float64 0.0 0.008257 ... 6.046
Attributes:
    scmdata_metadata_scenario:  a_scenario
    scmdata_metadata_region:    World
    scmdata_metadata_model:     a_model

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gsat     (time) float64 0.0 0.007687 0.01542 0.02304 ... 5.776 5.8 5.774
    gmst     (time) float64 0.0 0.008214 0.0164 0.02455 ... 6.117 6.132 6.154

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7fb1134b5730>,
 <matplotlib.lines.Line2D at 0x7fb1134536d0>,
 <matplotlib.lines.Line2D at 0x7fb1134537f0>,
 <matplotlib.lines.Line2D at 0x7fb1134539a0>,
 <matplotlib.lines.Line2D at 0x7fb113453a90>,
 <matplotlib.lines.Line2D at 0x7fb113453b80>,
 <matplotlib.lines.Line2D at 0x7fb113453c70>,
 <matplotlib.lines.Line2D at 0x7fb113453d60>,
 <matplotlib.lines.Line2D at 0x7fb113453e50>,
 <matplotlib.lines.Line2D at 0x7fb113453f40>,
 <matplotlib.lines.Line2D at 0x7fb113465070>,
 <matplotlib.lines.Line2D at 0x7fb113465160>,
 <matplotlib.lines.Line2D at 0x7fb113465280>,
 <matplotlib.lines.Line2D at 0x7fb113465370>,
 <matplotlib.lines.Line2D at 0x7fb113465460>,
 <matplotlib.lines.Line2D at 0x7fb113465550>,
 <matplotlib.lines.Line2D at 0x7fb113465640>,
 <matplotlib.lines.Line2D at 0x7fb113465730>,
 <matplotlib.lines.Line2D at 0x7fb113465820>,
 <matplotlib.lines.Line2D at 0x7fb113465910>,
 <matplotlib.lines.Line2D at 0x7fb113465a00>,
 <matplotlib.lines.Line2D at 0x7fb113465af0>,
 <matplotlib.lines.Line2D at 0x7fb113465be0>,
 <matplotlib.lines.Line2D at 0x7fb113465cd0>,
 <matplotlib.lines.Line2D at 0x7fb113465dc0>,
 <matplotlib.lines.Line2D at 0x7fb113465eb0>,
 <matplotlib.lines.Line2D at 0x7fb113465fa0>,
 <matplotlib.lines.Line2D at 0x7fb1134806a0>,
 <matplotlib.lines.Line2D at 0x7fb113480a00>,
 <matplotlib.lines.Line2D at 0x7fb113480af0>,
 <matplotlib.lines.Line2D at 0x7fb113480be0>,
 <matplotlib.lines.Line2D at 0x7fb113480cd0>,
 <matplotlib.lines.Line2D at 0x7fb113480dc0>,
 <matplotlib.lines.Line2D at 0x7fb113480eb0>,
 <matplotlib.lines.Line2D at 0x7fb113480fa0>,
 <matplotlib.lines.Line2D at 0x7fb1134805b0>,
 <matplotlib.lines.Line2D at 0x7fb113480460>,
 <matplotlib.lines.Line2D at 0x7fb113480340>,
 <matplotlib.lines.Line2D at 0x7fb113480250>,
 <matplotlib.lines.Line2D at 0x7fb1134800a0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3220>,
 <matplotlib.lines.Line2D at 0x7fb1134c3490>,
 <matplotlib.lines.Line2D at 0x7fb1134c3880>,
 <matplotlib.lines.Line2D at 0x7fb1134c3970>,
 <matplotlib.lines.Line2D at 0x7fb1134c3fa0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3eb0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3dc0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3cd0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3be0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3af0>,
 <matplotlib.lines.Line2D at 0x7fb1134c37c0>,
 <matplotlib.lines.Line2D at 0x7fb1134c36d0>,
 <matplotlib.lines.Line2D at 0x7fb1134c35e0>,
 <matplotlib.lines.Line2D at 0x7fb1134c34f0>,
 <matplotlib.lines.Line2D at 0x7fb1134c3310>,
 <matplotlib.lines.Line2D at 0x7fb1134c3190>,
 <matplotlib.lines.Line2D at 0x7fb1134b2df0>,
 <matplotlib.lines.Line2D at 0x7fb1134b2fd0>,
 <matplotlib.lines.Line2D at 0x7fb11346efa0>,
 <matplotlib.lines.Line2D at 0x7fb11346eeb0>,
 <matplotlib.lines.Line2D at 0x7fb113497040>,
 <matplotlib.lines.Line2D at 0x7fb113491fa0>,
 <matplotlib.lines.Line2D at 0x7fb11348c130>,
 <matplotlib.lines.Line2D at 0x7fb11348c0a0>,
 <matplotlib.lines.Line2D at 0x7fb11346ec40>,
 <matplotlib.lines.Line2D at 0x7fb11346eb50>,
 <matplotlib.lines.Line2D at 0x7fb11346ea60>,
 <matplotlib.lines.Line2D at 0x7fb11346e970>,
 <matplotlib.lines.Line2D at 0x7fb11346e880>,
 <matplotlib.lines.Line2D at 0x7fb11346e790>,
 <matplotlib.lines.Line2D at 0x7fb11346e6a0>,
 <matplotlib.lines.Line2D at 0x7fb11346e5b0>,
 <matplotlib.lines.Line2D at 0x7fb11346e4c0>,
 <matplotlib.lines.Line2D at 0x7fb11346e3d0>,
 <matplotlib.lines.Line2D at 0x7fb11346e2e0>,
 <matplotlib.lines.Line2D at 0x7fb11346e1f0>,
 <matplotlib.lines.Line2D at 0x7fb11346e100>,
 <matplotlib.lines.Line2D at 0x7fb11345ffd0>,
 <matplotlib.lines.Line2D at 0x7fb11345fee0>,
 <matplotlib.lines.Line2D at 0x7fb11345fdf0>,
 <matplotlib.lines.Line2D at 0x7fb11345fd00>,
 <matplotlib.lines.Line2D at 0x7fb11345fc10>,
 <matplotlib.lines.Line2D at 0x7fb11345fb20>,
 <matplotlib.lines.Line2D at 0x7fb11345fa30>,
 <matplotlib.lines.Line2D at 0x7fb11345f940>,
 <matplotlib.lines.Line2D at 0x7fb11345f850>,
 <matplotlib.lines.Line2D at 0x7fb11345f760>,
 <matplotlib.lines.Line2D at 0x7fb11345f670>,
 <matplotlib.lines.Line2D at 0x7fb11345f580>,
 <matplotlib.lines.Line2D at 0x7fb11345f490>,
 <matplotlib.lines.Line2D at 0x7fb11345f3a0>,
 <matplotlib.lines.Line2D at 0x7fb11345f2b0>,
 <matplotlib.lines.Line2D at 0x7fb11345f1c0>,
 <matplotlib.lines.Line2D at 0x7fb11345f0d0>,
 <matplotlib.lines.Line2D at 0x7fb113458fa0>,
 <matplotlib.lines.Line2D at 0x7fb113458eb0>,
 <matplotlib.lines.Line2D at 0x7fb113458dc0>,
 <matplotlib.lines.Line2D at 0x7fb113458cd0>,
 <matplotlib.lines.Line2D at 0x7fb113458be0>,
 <matplotlib.lines.Line2D at 0x7fb113458af0>]
../_images/49ca8eaaef52987bfbda48f4eeab4ed0ff80736214470438c10247769352b4b3.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7fb1133a0f70>,
 <matplotlib.lines.Line2D at 0x7fb1133354f0>,
 <matplotlib.lines.Line2D at 0x7fb113335640>,
 <matplotlib.lines.Line2D at 0x7fb1133357c0>,
 <matplotlib.lines.Line2D at 0x7fb1133358b0>,
 <matplotlib.lines.Line2D at 0x7fb1133359a0>,
 <matplotlib.lines.Line2D at 0x7fb113335a90>,
 <matplotlib.lines.Line2D at 0x7fb113335b80>,
 <matplotlib.lines.Line2D at 0x7fb113335c70>,
 <matplotlib.lines.Line2D at 0x7fb113335d60>]
../_images/42420160e436021b7c295ea4f6ac0c094a80c9cbde6083ff630231c8fd0bbfcb.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7fb113286cd0>
../_images/6f0a03b7ca02348e33a1708268b7f935be2085c0884a5f6221811997177ae9ed.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7fb1131b0c40>
../_images/0cbedb3fa647ea48717c47620fa6bf428630d27abe10c991fbbaa26b5f0cccf2.png