xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/envs/v0.15.2/lib/python3.9/site-packages/scmdata/run.py:195: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7f4c944f65b0>,
  <matplotlib.collections.PolyCollection at 0x7f4c94432040>,
  <matplotlib.lines.Line2D at 0x7f4c9443c490>,
  <matplotlib.patches.Patch at 0x7f4c9457b550>,
  <matplotlib.lines.Line2D at 0x7f4c9457b3d0>,
  <matplotlib.lines.Line2D at 0x7f4c9457b190>,
  <matplotlib.patches.Patch at 0x7f4c9457bc10>,
  <matplotlib.lines.Line2D at 0x7f4c9457be50>,
  <matplotlib.lines.Line2D at 0x7f4c94432c10>])
../_images/1a8dfebac067b8e8f45daa70c6f0ef5145e7ec585285cfe0a6ceff9f06e97609.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gmst             (ensemble_member, time) float64 0.0 0.00829 ... 6.277 6.091
    gsat             (ensemble_member, time) float64 0.0 0.007466 ... 5.95 5.634
Attributes:
    scmdata_metadata_scenario:  a_scenario
    scmdata_metadata_model:     a_model
    scmdata_metadata_region:    World

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gmst     (time) float64 0.0 0.008208 0.01635 0.02456 ... 6.136 6.134 6.132
    gsat     (time) float64 0.0 0.007609 0.01535 0.02309 ... 5.764 5.799 5.788

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7f4c94545e20>,
 <matplotlib.lines.Line2D at 0x7f4c91b0ed60>,
 <matplotlib.lines.Line2D at 0x7f4c91acaa00>,
 <matplotlib.lines.Line2D at 0x7f4c91acaac0>,
 <matplotlib.lines.Line2D at 0x7f4c91acab20>,
 <matplotlib.lines.Line2D at 0x7f4c91acac10>,
 <matplotlib.lines.Line2D at 0x7f4c91acad00>,
 <matplotlib.lines.Line2D at 0x7f4c91acadf0>,
 <matplotlib.lines.Line2D at 0x7f4c91acaee0>,
 <matplotlib.lines.Line2D at 0x7f4c91acafd0>,
 <matplotlib.lines.Line2D at 0x7f4c91b0eb20>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8100>,
 <matplotlib.lines.Line2D at 0x7f4c91ad81f0>,
 <matplotlib.lines.Line2D at 0x7f4c91ad83a0>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8490>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8580>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8670>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8760>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8850>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8940>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8a30>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8b20>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8c10>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8d00>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8df0>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8ee0>,
 <matplotlib.lines.Line2D at 0x7f4c91ad8fd0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0100>,
 <matplotlib.lines.Line2D at 0x7f4c91ae01f0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae02e0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae03d0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae04c0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae05b0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae06a0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0790>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0880>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0970>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0a60>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0b50>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0c40>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0d30>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0e20>,
 <matplotlib.lines.Line2D at 0x7f4c91ae0f10>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8040>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8130>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8220>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8310>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8400>,
 <matplotlib.lines.Line2D at 0x7f4c91ae84f0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae85e0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae86d0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae87c0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae88b0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae89a0>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8a90>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8b80>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8c70>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8d60>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8e50>,
 <matplotlib.lines.Line2D at 0x7f4c91ae8f40>,
 <matplotlib.lines.Line2D at 0x7f4c91a70070>,
 <matplotlib.lines.Line2D at 0x7f4c91a70160>,
 <matplotlib.lines.Line2D at 0x7f4c91a70250>,
 <matplotlib.lines.Line2D at 0x7f4c91a70340>,
 <matplotlib.lines.Line2D at 0x7f4c91a70430>,
 <matplotlib.lines.Line2D at 0x7f4c91a70520>,
 <matplotlib.lines.Line2D at 0x7f4c91a70610>,
 <matplotlib.lines.Line2D at 0x7f4c91a70700>,
 <matplotlib.lines.Line2D at 0x7f4c91a707f0>,
 <matplotlib.lines.Line2D at 0x7f4c91a708e0>,
 <matplotlib.lines.Line2D at 0x7f4c91a709d0>,
 <matplotlib.lines.Line2D at 0x7f4c91a70ac0>,
 <matplotlib.lines.Line2D at 0x7f4c91a70bb0>,
 <matplotlib.lines.Line2D at 0x7f4c91a70ca0>,
 <matplotlib.lines.Line2D at 0x7f4c91a70d90>,
 <matplotlib.lines.Line2D at 0x7f4c91a70e80>,
 <matplotlib.lines.Line2D at 0x7f4c91a70f70>,
 <matplotlib.lines.Line2D at 0x7f4c91a780a0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78190>,
 <matplotlib.lines.Line2D at 0x7f4c91a78280>,
 <matplotlib.lines.Line2D at 0x7f4c91a78370>,
 <matplotlib.lines.Line2D at 0x7f4c91a78460>,
 <matplotlib.lines.Line2D at 0x7f4c91a78550>,
 <matplotlib.lines.Line2D at 0x7f4c91a78640>,
 <matplotlib.lines.Line2D at 0x7f4c91a78730>,
 <matplotlib.lines.Line2D at 0x7f4c91a78820>,
 <matplotlib.lines.Line2D at 0x7f4c91a78910>,
 <matplotlib.lines.Line2D at 0x7f4c91a78a00>,
 <matplotlib.lines.Line2D at 0x7f4c91a78af0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78be0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78cd0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78dc0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78eb0>,
 <matplotlib.lines.Line2D at 0x7f4c91a78fa0>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f0d0>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f1c0>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f2b0>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f3a0>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f490>,
 <matplotlib.lines.Line2D at 0x7f4c91a7f580>]
../_images/808307e727607d0f36a995ac3c1d125bf4d82d75d5c156a92d34e5044404418a.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7f4c9199e4c0>,
 <matplotlib.lines.Line2D at 0x7f4c91946760>,
 <matplotlib.lines.Line2D at 0x7f4c91946940>,
 <matplotlib.lines.Line2D at 0x7f4c91946a00>,
 <matplotlib.lines.Line2D at 0x7f4c91946a60>,
 <matplotlib.lines.Line2D at 0x7f4c91946b50>,
 <matplotlib.lines.Line2D at 0x7f4c91946c40>,
 <matplotlib.lines.Line2D at 0x7f4c91946d30>,
 <matplotlib.lines.Line2D at 0x7f4c91946e20>,
 <matplotlib.lines.Line2D at 0x7f4c91946f10>]
../_images/91a0187b9231d19e4d238c41ac9ef9afda2afa749c13566d3d29a0f36eadab10.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7f4c918e1fd0>
../_images/d6c09f5b002aeacb4708cbc37fd208bcb153798d2479b641881385f5bc8fb6bf.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7f4c917c3820>
../_images/31b84663796505fda97b4e9a87cd6da2eeb40a254c81fb682302950487701bb7.png