xarray compatibility

scmdata allows datat to be exported to xarray. This makes it easy to use xarray’s many helpful features, most of which are not natively provided in scmdata.

import numpy as np

from scmdata import ScmRun
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/database/_database.py:9: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  import tqdm.autonotebook as tqdman
def get_data(years, n_ensemble_members, end_val, rand_pct):
    """
    Get sample data
    """
    return (np.arange(years.shape[0]) / years.shape[0] * end_val)[:, np.newaxis] * (
        rand_pct * np.random.random((years.shape[0], n_ensemble_members)) + 1
    )
years = np.arange(1750, 2500 + 1)
variables = ["gsat", "gmst"]
n_variables = len(variables)
n_ensemble_members = 100


start = ScmRun(
    np.hstack(
        [
            get_data(years, n_ensemble_members, 5.5, 0.1),
            get_data(years, n_ensemble_members, 6.0, 0.05),
        ]
    ),
    index=years,
    columns={
        "model": "a_model",
        "scenario": "a_scenario",
        "variable": [v for v in variables for i in range(n_ensemble_members)],
        "region": "World",
        "unit": "K",
        "ensemble_member": [i for v in variables for i in range(n_ensemble_members)],
    },
)
start
<ScmRun (timeseries: 200, timepoints: 751)>
Time:
	Start: 1750-01-01T00:00:00
	End: 2500-01-01T00:00:00
Meta:
	     ensemble_member    model region    scenario unit variable
	0                  0  a_model  World  a_scenario    K     gsat
	1                  1  a_model  World  a_scenario    K     gsat
	2                  2  a_model  World  a_scenario    K     gsat
	3                  3  a_model  World  a_scenario    K     gsat
	4                  4  a_model  World  a_scenario    K     gsat
	..               ...      ...    ...         ...  ...      ...
	195               95  a_model  World  a_scenario    K     gmst
	196               96  a_model  World  a_scenario    K     gmst
	197               97  a_model  World  a_scenario    K     gmst
	198               98  a_model  World  a_scenario    K     gmst
	199               99  a_model  World  a_scenario    K     gmst
	
	[200 rows x 6 columns]

The usual scmdata methods are of course available.

start.plumeplot(
    quantile_over="ensemble_member", hue_var="variable", hue_label="Variable"
)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
/home/docs/checkouts/readthedocs.org/user_builds/scmdata/checkouts/v0.15.3/src/scmdata/run.py:191: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  df.reset_index(inplace=True)
(<Axes: ylabel='K'>,
 [<matplotlib.patches.Patch at 0x7fe270a93d30>,
  <matplotlib.collections.PolyCollection at 0x7fe2a410e730>,
  <matplotlib.lines.Line2D at 0x7fe270942850>,
  <matplotlib.patches.Patch at 0x7fe270ab9340>,
  <matplotlib.lines.Line2D at 0x7fe270aa0ac0>,
  <matplotlib.lines.Line2D at 0x7fe270aa0700>,
  <matplotlib.patches.Patch at 0x7fe2708aa4c0>,
  <matplotlib.lines.Line2D at 0x7fe2708aa460>,
  <matplotlib.lines.Line2D at 0x7fe2708a0070>])
../_images/d3cf9e8d09dd2009168d4638f9f9abc513aab6cc0d4aae6bed9092ac11088b89.png

However, we can cast to an xarray DataSet and then all the xarray methods become available too.

xr_ds = start.to_xarray(dimensions=("ensemble_member",))
xr_ds
<xarray.Dataset>
Dimensions:          (time: 751, ensemble_member: 100)
Coordinates:
  * time             (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
  * ensemble_member  (ensemble_member) int64 0 1 2 3 4 5 6 ... 94 95 96 97 98 99
Data variables:
    gmst             (ensemble_member, time) float64 0.0 0.008363 ... 6.117
    gsat             (ensemble_member, time) float64 0.0 0.007729 ... 5.973
Attributes:
    scmdata_metadata_region:    World
    scmdata_metadata_scenario:  a_scenario
    scmdata_metadata_model:     a_model

For example, calculating statistics.

xr_ds.median(dim="ensemble_member")
<xarray.Dataset>
Dimensions:  (time: 751)
Coordinates:
  * time     (time) object 1750-01-01 00:00:00 ... 2500-01-01 00:00:00
Data variables:
    gmst     (time) float64 0.0 0.008176 0.0163 0.02463 ... 6.12 6.164 6.109
    gsat     (time) float64 0.0 0.007712 0.01543 0.02303 ... 5.806 5.764 5.742

Plotting timeseries.

xr_ds["gsat"].plot.line(hue="ensemble_member", add_legend=False)
[<matplotlib.lines.Line2D at 0x7fe26e7a57f0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb5b0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb790>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb850>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb8e0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb9d0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbac0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbbb0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbca0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbd90>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbe80>,
 <matplotlib.lines.Line2D at 0x7fe26e6fb160>,
 <matplotlib.lines.Line2D at 0x7fe26e6fbf70>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd070>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd160>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd250>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd340>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd430>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd520>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd610>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd700>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd7f0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd8e0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fd9d0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fdc10>,
 <matplotlib.lines.Line2D at 0x7fe26e6fdaf0>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8df0>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8ee0>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8fd0>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8cd0>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8b50>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8a00>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8910>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8820>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8730>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8640>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8550>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8460>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8370>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8280>,
 <matplotlib.lines.Line2D at 0x7fe26e6f8190>,
 <matplotlib.lines.Line2D at 0x7fe26e6f80a0>,
 <matplotlib.lines.Line2D at 0x7fe26e7693d0>,
 <matplotlib.lines.Line2D at 0x7fe26e7694c0>,
 <matplotlib.lines.Line2D at 0x7fe26e7695b0>,
 <matplotlib.lines.Line2D at 0x7fe26e7696a0>,
 <matplotlib.lines.Line2D at 0x7fe26e769790>,
 <matplotlib.lines.Line2D at 0x7fe26e769880>,
 <matplotlib.lines.Line2D at 0x7fe26e7699a0>,
 <matplotlib.lines.Line2D at 0x7fe26e769a90>,
 <matplotlib.lines.Line2D at 0x7fe26e769b80>,
 <matplotlib.lines.Line2D at 0x7fe26e769c70>,
 <matplotlib.lines.Line2D at 0x7fe26e769d60>,
 <matplotlib.lines.Line2D at 0x7fe26e769e50>,
 <matplotlib.lines.Line2D at 0x7fe26e769f40>,
 <matplotlib.lines.Line2D at 0x7fe26e769310>,
 <matplotlib.lines.Line2D at 0x7fe26e769220>,
 <matplotlib.lines.Line2D at 0x7fe26e769130>,
 <matplotlib.lines.Line2D at 0x7fe26e769040>,
 <matplotlib.lines.Line2D at 0x7fe26e74f8e0>,
 <matplotlib.lines.Line2D at 0x7fe26e74fb80>,
 <matplotlib.lines.Line2D at 0x7fe26e74ff70>,
 <matplotlib.lines.Line2D at 0x7fe26e74fe20>,
 <matplotlib.lines.Line2D at 0x7fe26e74fd30>,
 <matplotlib.lines.Line2D at 0x7fe26e74fc40>,
 <matplotlib.lines.Line2D at 0x7fe26e74fa60>,
 <matplotlib.lines.Line2D at 0x7fe26e74f970>,
 <matplotlib.lines.Line2D at 0x7fe26e74f7f0>,
 <matplotlib.lines.Line2D at 0x7fe2708a0130>,
 <matplotlib.lines.Line2D at 0x7fe26e704f40>,
 <matplotlib.lines.Line2D at 0x7fe26e704e50>,
 <matplotlib.lines.Line2D at 0x7fe26e704d60>,
 <matplotlib.lines.Line2D at 0x7fe26e704c70>,
 <matplotlib.lines.Line2D at 0x7fe26e704b80>,
 <matplotlib.lines.Line2D at 0x7fe26e704a90>,
 <matplotlib.lines.Line2D at 0x7fe26e7049a0>,
 <matplotlib.lines.Line2D at 0x7fe26e7048b0>,
 <matplotlib.lines.Line2D at 0x7fe26e7047c0>,
 <matplotlib.lines.Line2D at 0x7fe26e7046d0>,
 <matplotlib.lines.Line2D at 0x7fe26e7045e0>,
 <matplotlib.lines.Line2D at 0x7fe26e7044f0>,
 <matplotlib.lines.Line2D at 0x7fe26e704400>,
 <matplotlib.lines.Line2D at 0x7fe26e704310>,
 <matplotlib.lines.Line2D at 0x7fe26e704220>,
 <matplotlib.lines.Line2D at 0x7fe26e704130>,
 <matplotlib.lines.Line2D at 0x7fe26e704040>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe100>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe1f0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe2e0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe3d0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe4c0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe5b0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe6a0>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe790>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe880>,
 <matplotlib.lines.Line2D at 0x7fe26e6fe970>,
 <matplotlib.lines.Line2D at 0x7fe26e6fea60>,
 <matplotlib.lines.Line2D at 0x7fe26e6feb50>,
 <matplotlib.lines.Line2D at 0x7fe26e6fec40>,
 <matplotlib.lines.Line2D at 0x7fe26e6fed30>]
../_images/a08d3071df65297407def3877a5190235c0b94480918118289fdc1eea82393b8.png

Selecting and plotting timeseries.

xr_ds["gsat"].sel(ensemble_member=range(10)).plot.line(
    hue="ensemble_member", add_legend=False
)
[<matplotlib.lines.Line2D at 0x7fe26e5d0eb0>,
 <matplotlib.lines.Line2D at 0x7fe26e62abb0>,
 <matplotlib.lines.Line2D at 0x7fe26e62adf0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de100>,
 <matplotlib.lines.Line2D at 0x7fe26e5de1f0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de2e0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de3d0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de4c0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de5b0>,
 <matplotlib.lines.Line2D at 0x7fe26e5de6a0>]
../_images/83746e77128a94572a7d390e9db0568d5dc21aee820208bfa7bb963e97034efb.png

Scatter plots.

xr_ds.plot.scatter(x="gsat", y="gmst", hue="ensemble_member", alpha=0.3)
<matplotlib.collections.PathCollection at 0x7fe26e4f2c10>
../_images/8b7d4c6524a8214cd6c5dc5807a1769734803f3842c5a0ff7186efa1b111f74f.png

Or combinations of calculations and plots.

xr_ds.median(dim="ensemble_member").plot.scatter(x="gsat", y="gmst")
<matplotlib.collections.PathCollection at 0x7fe26e45b460>
../_images/fb3b9bfb44cbc8794a3d56159245b6afd636048038066cc13733d519ed3952bd.png