xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    """
    Get sample data
    """
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/run.py:197: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7f9278342d00>,
  <matplotlib.collections.PolyCollection at 0x7f927833ab80>,
  <matplotlib.lines.Line2D at 0x7f9278158eb0>,
  <matplotlib.patches.Patch at 0x7f92ac050760>,
  <matplotlib.lines.Line2D at 0x7f92846e1580>,
  <matplotlib.lines.Line2D at 0x7f92836cedf0>,
  <matplotlib.patches.Patch at 0x7f92ac050190>,
  <matplotlib.lines.Line2D at 0x7f92836da100>,
  <matplotlib.lines.Line2D at 0x7f92836da0a0>])
../_images/63c99854d466659bc7f9a81b165966a7f05631173fe094b76753198880112e98.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/stable/src/scmdata/_xarray.py:236: FutureWarning: The previous implementation of stack is deprecated and will be removed in a future version of pandas. See the What's New notes for pandas 2.1.0 for details. Specify future_stack=True to adopt the new implementation and silence this warning.
  else timeseries.T.stack(dimensions)
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gsat             (ensemble_member, time) float64 0.0 0.007884 ... 5.881
    gmst             (ensemble_member, time) float64 0.0 0.008282 ... 6.227
Attributes:
    scmdata_metadata_region:    World
    scmdata_metadata_model:     a_model
    scmdata_metadata_scenario:  a_scenario

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gsat     (time) float64 0.0 0.007716 0.01544 0.02297 ... 5.748 5.789 5.775
    gmst     (time) float64 0.0 0.008173 0.01646 0.02462 ... 6.135 6.113 6.151

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7f927582c640>,
 <matplotlib.lines.Line2D at 0x7f927577f6d0>,
 <matplotlib.lines.Line2D at 0x7f927577f7f0>,
 <matplotlib.lines.Line2D at 0x7f927577f9a0>,
 <matplotlib.lines.Line2D at 0x7f927577fa90>,
 <matplotlib.lines.Line2D at 0x7f927577fb80>,
 <matplotlib.lines.Line2D at 0x7f927577fc70>,
 <matplotlib.lines.Line2D at 0x7f927577fd60>,
 <matplotlib.lines.Line2D at 0x7f927577fe50>,
 <matplotlib.lines.Line2D at 0x7f927577ff40>,
 <matplotlib.lines.Line2D at 0x7f9275791070>,
 <matplotlib.lines.Line2D at 0x7f9275791190>,
 <matplotlib.lines.Line2D at 0x7f9275791280>,
 <matplotlib.lines.Line2D at 0x7f9275791370>,
 <matplotlib.lines.Line2D at 0x7f9275791460>,
 <matplotlib.lines.Line2D at 0x7f9275791550>,
 <matplotlib.lines.Line2D at 0x7f9275791640>,
 <matplotlib.lines.Line2D at 0x7f9275791730>,
 <matplotlib.lines.Line2D at 0x7f9275791820>,
 <matplotlib.lines.Line2D at 0x7f9275791910>,
 <matplotlib.lines.Line2D at 0x7f9275791a00>,
 <matplotlib.lines.Line2D at 0x7f9275791af0>,
 <matplotlib.lines.Line2D at 0x7f9275791be0>,
 <matplotlib.lines.Line2D at 0x7f9275791cd0>,
 <matplotlib.lines.Line2D at 0x7f9275791dc0>,
 <matplotlib.lines.Line2D at 0x7f9275791eb0>,
 <matplotlib.lines.Line2D at 0x7f9275791fa0>,
 <matplotlib.lines.Line2D at 0x7f92757ac5b0>,
 <matplotlib.lines.Line2D at 0x7f92757ac910>,
 <matplotlib.lines.Line2D at 0x7f92757aca00>,
 <matplotlib.lines.Line2D at 0x7f92757acaf0>,
 <matplotlib.lines.Line2D at 0x7f92757acbe0>,
 <matplotlib.lines.Line2D at 0x7f92757accd0>,
 <matplotlib.lines.Line2D at 0x7f92757acdc0>,
 <matplotlib.lines.Line2D at 0x7f92757aceb0>,
 <matplotlib.lines.Line2D at 0x7f92757acfa0>,
 <matplotlib.lines.Line2D at 0x7f92757ac4c0>,
 <matplotlib.lines.Line2D at 0x7f92757ac370>,
 <matplotlib.lines.Line2D at 0x7f92757ac250>,
 <matplotlib.lines.Line2D at 0x7f92757ac160>,
 <matplotlib.lines.Line2D at 0x7f9275794070>,
 <matplotlib.lines.Line2D at 0x7f9275794160>,
 <matplotlib.lines.Line2D at 0x7f9275794250>,
 <matplotlib.lines.Line2D at 0x7f9275794eb0>,
 <matplotlib.lines.Line2D at 0x7f9275794dc0>,
 <matplotlib.lines.Line2D at 0x7f9275794cd0>,
 <matplotlib.lines.Line2D at 0x7f9275794be0>,
 <matplotlib.lines.Line2D at 0x7f9275794af0>,
 <matplotlib.lines.Line2D at 0x7f9275794a00>,
 <matplotlib.lines.Line2D at 0x7f9275794910>,
 <matplotlib.lines.Line2D at 0x7f9275794820>,
 <matplotlib.lines.Line2D at 0x7f9275794730>,
 <matplotlib.lines.Line2D at 0x7f9275794640>,
 <matplotlib.lines.Line2D at 0x7f9275794550>,
 <matplotlib.lines.Line2D at 0x7f9275794460>,
 <matplotlib.lines.Line2D at 0x7f9275794370>,
 <matplotlib.lines.Line2D at 0x7f9275794280>,
 <matplotlib.lines.Line2D at 0x7f92757dddf0>,
 <matplotlib.lines.Line2D at 0x7f92757ddf10>,
 <matplotlib.lines.Line2D at 0x7f927579bfa0>,
 <matplotlib.lines.Line2D at 0x7f92757daeb0>,
 <matplotlib.lines.Line2D at 0x7f92757daee0>,
 <matplotlib.lines.Line2D at 0x7f92757c2040>,
 <matplotlib.lines.Line2D at 0x7f927582cf70>,
 <matplotlib.lines.Line2D at 0x7f927579bd30>,
 <matplotlib.lines.Line2D at 0x7f927579bc40>,
 <matplotlib.lines.Line2D at 0x7f927579bb50>,
 <matplotlib.lines.Line2D at 0x7f927579ba60>,
 <matplotlib.lines.Line2D at 0x7f927579b970>,
 <matplotlib.lines.Line2D at 0x7f927579b880>,
 <matplotlib.lines.Line2D at 0x7f927579b790>,
 <matplotlib.lines.Line2D at 0x7f927579b6a0>,
 <matplotlib.lines.Line2D at 0x7f927579b5b0>,
 <matplotlib.lines.Line2D at 0x7f927579b4c0>,
 <matplotlib.lines.Line2D at 0x7f927579b3d0>,
 <matplotlib.lines.Line2D at 0x7f927579b2e0>,
 <matplotlib.lines.Line2D at 0x7f927579b1f0>,
 <matplotlib.lines.Line2D at 0x7f927579b100>,
 <matplotlib.lines.Line2D at 0x7f927578cfd0>,
 <matplotlib.lines.Line2D at 0x7f927578cee0>,
 <matplotlib.lines.Line2D at 0x7f927578cdf0>,
 <matplotlib.lines.Line2D at 0x7f927578cd00>,
 <matplotlib.lines.Line2D at 0x7f927578cc10>,
 <matplotlib.lines.Line2D at 0x7f927578cb20>,
 <matplotlib.lines.Line2D at 0x7f927578ca30>,
 <matplotlib.lines.Line2D at 0x7f927578c940>,
 <matplotlib.lines.Line2D at 0x7f927578c850>,
 <matplotlib.lines.Line2D at 0x7f927578c760>,
 <matplotlib.lines.Line2D at 0x7f927578c670>,
 <matplotlib.lines.Line2D at 0x7f927578c580>,
 <matplotlib.lines.Line2D at 0x7f927578c490>,
 <matplotlib.lines.Line2D at 0x7f927578c3a0>,
 <matplotlib.lines.Line2D at 0x7f927578c2b0>,
 <matplotlib.lines.Line2D at 0x7f927578c1c0>,
 <matplotlib.lines.Line2D at 0x7f927578c0d0>,
 <matplotlib.lines.Line2D at 0x7f9275784fa0>,
 <matplotlib.lines.Line2D at 0x7f9275784eb0>,
 <matplotlib.lines.Line2D at 0x7f9275784dc0>,
 <matplotlib.lines.Line2D at 0x7f9275784cd0>,
 <matplotlib.lines.Line2D at 0x7f9275784be0>]
../_images/39ace3d68a9feea80615eaeb0f431a096f0b6e8af62a8f78757df1a305651403.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7f92756aae80>,
 <matplotlib.lines.Line2D at 0x7f9275662400>,
 <matplotlib.lines.Line2D at 0x7f9275662550>,
 <matplotlib.lines.Line2D at 0x7f92756626d0>,
 <matplotlib.lines.Line2D at 0x7f92756627c0>,
 <matplotlib.lines.Line2D at 0x7f92756628b0>,
 <matplotlib.lines.Line2D at 0x7f92756629a0>,
 <matplotlib.lines.Line2D at 0x7f9275662a90>,
 <matplotlib.lines.Line2D at 0x7f9275662b80>,
 <matplotlib.lines.Line2D at 0x7f9275662c70>]
../_images/d4251548fccbe981ae9788d431f3752fd44b11cb1a0a6b40d595d639b356c17a.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7f92755b3be0>
../_images/1aeba797080a4ce8123dc25b502a1136ce3c072ebbc5810bbb30317a1e08c24e.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7f92754dbb50>
../_images/988b9bfe98954e7b066c07e6e5be5acaa4d6263a3767f8c2673fd764958edda9.png