xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    """
    Get sample data
    """
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.16.0/src/scmdata/run.py:196: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7fcdf95da130>,
  <matplotlib.collections.PolyCollection at 0x7fcdf93a9040>,
  <matplotlib.lines.Line2D at 0x7fcdf94ccb20>,
  <matplotlib.patches.Patch at 0x7fcdf936cca0>,
  <matplotlib.lines.Line2D at 0x7fcdf95c1f10>,
  <matplotlib.lines.Line2D at 0x7fcdf9393d30>,
  <matplotlib.patches.Patch at 0x7fcdf9578d90>,
  <matplotlib.lines.Line2D at 0x7fcdf9578100>,
  <matplotlib.lines.Line2D at 0x7fcdf9578d60>])
../_images/bc90d74ab0dbd72194110f4e8a8ee3bd39a1a44538eab68000fa421475eb72fa.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gsat             (ensemble_member, time) float64 0.0 0.007551 ... 5.573
    gmst             (ensemble_member, time) float64 0.0 0.00812 ... 6.068 6.19
Attributes:
    scmdata_metadata_region:    World
    scmdata_metadata_scenario:  a_scenario
    scmdata_metadata_model:     a_model

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gsat     (time) float64 0.0 0.007642 0.01537 0.0229 ... 5.718 5.815 5.774
    gmst     (time) float64 0.0 0.008174 0.01638 0.02445 ... 6.125 6.122 6.151

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7fcdf6a5edf0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2c5e0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2c700>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2c8e0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2c9d0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2cac0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2cbb0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2cca0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2cd90>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2ce80>,
 <matplotlib.lines.Line2D at 0x7fcdf6a2cf70>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47cd0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47f40>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47e80>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47a60>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47910>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47820>,
 <matplotlib.lines.Line2D at 0x7fcdf6a476a0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47550>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47460>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47370>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47280>,
 <matplotlib.lines.Line2D at 0x7fcdf6a47190>,
 <matplotlib.lines.Line2D at 0x7fcdf6a470a0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a280>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a370>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a460>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a550>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a640>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a730>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a850>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a940>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1aa30>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1ab20>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1ac10>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1ad00>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1adf0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1aee0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1afd0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a130>,
 <matplotlib.lines.Line2D at 0x7fcdf6a1a040>,
 <matplotlib.lines.Line2D at 0x7fcdf6a847c0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84a30>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84e20>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84f10>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84d30>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84c40>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84b50>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84a60>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84880>,
 <matplotlib.lines.Line2D at 0x7fcdf6a84700>,
 <matplotlib.lines.Line2D at 0x7fcdf6a845b0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36fd0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36ee0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36df0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36d00>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36c10>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36b20>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36a30>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36940>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36850>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36760>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36670>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36580>,
 <matplotlib.lines.Line2D at 0x7fcdf6a36490>,
 <matplotlib.lines.Line2D at 0x7fcdf6a363a0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a362b0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a361c0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a360d0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28fa0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28eb0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28dc0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28cd0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28be0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28af0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28a00>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28910>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28820>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28730>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28640>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28550>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28460>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28370>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28280>,
 <matplotlib.lines.Line2D at 0x7fcdf6a28190>,
 <matplotlib.lines.Line2D at 0x7fcdf6a280a0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23f70>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23e80>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23d90>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23ca0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23bb0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23ac0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a239d0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a238e0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a237f0>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23700>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23610>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23520>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23430>,
 <matplotlib.lines.Line2D at 0x7fcdf6a23340>]
../_images/5421e06bcbe0106c475d11658e5643ba46b1c87c12d8c2cce5885fcfdb77c036.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7fcdf694d880>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5940>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5a90>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5c10>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5d00>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5df0>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5ee0>,
 <matplotlib.lines.Line2D at 0x7fcdf68f5fd0>,
 <matplotlib.lines.Line2D at 0x7fcdf6902100>,
 <matplotlib.lines.Line2D at 0x7fcdf69021f0>]
../_images/1eba21b93ae97a31853bdffc63d02685ba89e93470559ea14e8353b9f664a9a1.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7fcdf681be50>
../_images/76db31f9011d7cad8b7198f7bb52f5a16ef07bd58644dbf8bc04838b882138df.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7fcdf675cfa0>
../_images/3d3db78481326e16f2d05cc2d193e90718f786cc0077001b01ed8bacb588227a.png