CAMELS Australia
This example demonstrates how to use the water_datasets package to download and explore the CAMELS Australia dataset using the :py:class:water_datasets.RainfallRunoff class. Although we show it for CAMELS Australia, the same can be done for all other rainfall runoff datasets.
Note: This file runs online on readthedocs everytime the documentation is built. The server to download the CAMELS_AUS data is sometimes down and gives HTTPError: HTTP Error 500: Internal Server Error.
[1]:
import os
import site
if __name__ == '__main__':
wd_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__')))))
#wd_dir = os.path.dirname(os.path.realpath('__file__'))
#wd_dir = os.path.dirname(os.path.dirname(os.path.realpath('__file__')))
print(wd_dir)
site.addsitedir(wd_dir)
from tabulight import EDA
import matplotlib.pyplot as plt
from easy_mpl import scatter, hist
from easy_mpl.utils import process_cbar
from aqua_fetch import RainfallRunoff
from aqua_fetch.utils import print_info
/home/docs/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest
[2]:
print_info()
numpy 1.26.4
pandas 2.1.4
water_quality 0.1.0
python 3.12.7 (main, Nov 5 2024, 17:00:24) [GCC 9.4.0]
os posix
matplotlib 3.8.4
xarray 2024.7.0
netCDF4 1.7.2
scipy 1.15.1
Script Executed on: 19 January 2025 12:13:12
tot_cpus 2
avail_cpus 2
mem_gib 7.612831115722656
[3]:
dataset = RainfallRunoff('CAMELS_AUS', version=1,
#overwrite=True,
#path='/mnt/datawaha/hyex/atr/gscad_database/raw/CAMELS_AUS_V1'
)
01_id_name_metadata.zip already exists at /home/docs/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/data/CAMELS/CAMELS_AUS
Downloading 02_location_boundary_area.zip from https://download.pangaea.de/dataset/921850/files/02_location_boundary_area.zip at /home/docs/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/data/CAMELS/CAMELS_AUS/02_location_boundary_area.zip
HTTP Error for https://download.pangaea.de/dataset/921850/files/02_location_boundary_area.zip to download 02_location_boundary_area.zip
/home/docs/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/rr/_camels.py:276: SyntaxWarning: invalid escape sequence '\s'
sep="\s+|;|:",
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
Cell In[3], line 1
----> 1 dataset = RainfallRunoff('CAMELS_AUS', version=1,
2 #overwrite=True,
3 #path='/mnt/datawaha/hyex/atr/gscad_database/raw/CAMELS_AUS_V1'
4 )
File ~/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/rr/__init__.py:296, in RainfallRunoff.__init__(self, dataset, path, overwrite, to_netcdf, processes, remove_zip, verbosity, **kwargs)
293 if dataset not in DATASETS:
294 raise ValueError(f"Dataset {dataset} not available")
--> 296 self.dataset = DATASETS[dataset](
297 path=path,
298 overwrite=overwrite,
299 to_netcdf=to_netcdf,
300 processes=processes,
301 remove_zip=remove_zip,
302 verbosity=verbosity,
303 **kwargs
304 )
File ~/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/rr/_camels.py:835, in CAMELS_AUS.__init__(self, path, version, to_netcdf, overwrite, verbosity, **kwargs)
833 if verbosity > 0:
834 print(f"Downloading {_file} from {url + _file} at {fpath}")
--> 835 download(url + _file, outdir=self.path, fname=_file)
836 elif verbosity > 0:
837 print(f"{_file} already exists at {self.path}")
File ~/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/utils.py:101, in download(url, outdir, fname, verbosity)
99 except ulib.HTTPError as e:
100 print(f"HTTP Error for {url} to download {fname}")
--> 101 raise e
103 filename = filename_from_url(url)
105 if fname:
File ~/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/utils.py:98, in download(url, outdir, fname, verbosity)
95 binurl = urlparse.urlunsplit(binurl)
97 try:
---> 98 (tmpfile, headers) = ulib.urlretrieve(binurl, tmpfile, callback)
99 except ulib.HTTPError as e:
100 print(f"HTTP Error for {url} to download {fname}")
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:240, in urlretrieve(url, filename, reporthook, data)
223 """
224 Retrieve a URL into a temporary location on disk.
225
(...)
236 data file as well as the resulting HTTPMessage object.
237 """
238 url_type, path = _splittype(url)
--> 240 with contextlib.closing(urlopen(url, data)) as fp:
241 headers = fp.info()
243 # Just return the local path and the "headers" for file://
244 # URLs. No sense in performing a copy unless requested.
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:215, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
213 else:
214 opener = _opener
--> 215 return opener.open(url, data, timeout)
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:521, in OpenerDirector.open(self, fullurl, data, timeout)
519 for processor in self.process_response.get(protocol, []):
520 meth = getattr(processor, meth_name)
--> 521 response = meth(req, response)
523 return response
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:630, in HTTPErrorProcessor.http_response(self, request, response)
627 # According to RFC 2616, "2xx" code indicates that the client's
628 # request was successfully received, understood, and accepted.
629 if not (200 <= code < 300):
--> 630 response = self.parent.error(
631 'http', request, response, code, msg, hdrs)
633 return response
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:559, in OpenerDirector.error(self, proto, *args)
557 if http_err:
558 args = (dict, 'default', 'http_error_default') + orig_args
--> 559 return self._call_chain(*args)
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:492, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
490 for handler in handlers:
491 func = getattr(handler, meth_name)
--> 492 result = func(*args)
493 if result is not None:
494 return result
File ~/.asdf/installs/python/3.12.7/lib/python3.12/urllib/request.py:639, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
638 def http_error_default(self, req, fp, code, msg, hdrs):
--> 639 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 503: Service Unavailable
[4]:
dataset.start
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[4], line 1
----> 1 dataset.start
NameError: name 'dataset' is not defined
[5]:
dataset.end
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[5], line 1
----> 1 dataset.end
NameError: name 'dataset' is not defined
[6]:
stations = dataset.stations()
len(stations)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[6], line 1
----> 1 stations = dataset.stations()
2 len(stations)
NameError: name 'dataset' is not defined
[7]:
stations[0:10]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[7], line 1
----> 1 stations[0:10]
NameError: name 'stations' is not defined
Static Features
[8]:
dataset.static_features
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[8], line 1
----> 1 dataset.static_features
NameError: name 'dataset' is not defined
[9]:
len(dataset.static_features)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[9], line 1
----> 1 len(dataset.static_features)
NameError: name 'dataset' is not defined
[10]:
mrvbf = 'proportion of catchment occupied by classes of MultiResolution Valley Bottom Flatness'
lc01 = 'land cover codes'
nvis = 'vegetation sub-groups'
anngro = 'Average annual growth index value for some plants'
gromega = 'Seasonality of growth index value'
npp = 'net primary productivity'
[11]:
static = dataset.fetch_static_features(stations=stations)
static.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[11], line 1
----> 1 static = dataset.fetch_static_features(stations=stations)
2 static.shape
NameError: name 'dataset' is not defined
[12]:
# EDA(data=static, save=False).heatmap()
[13]:
physical_features = []
soil_features = []
geological_features = []
flow_characteristics = []
static = static.dropna(axis=1)
static.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[13], line 6
3 geological_features = []
4 flow_characteristics = []
----> 6 static = static.dropna(axis=1)
7 static.shape
NameError: name 'static' is not defined
[14]:
coords = dataset.stn_coords()
coords
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[14], line 1
----> 1 coords = dataset.stn_coords()
2 coords
NameError: name 'dataset' is not defined
[15]:
dataset.plot_stations()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[15], line 1
----> 1 dataset.plot_stations()
NameError: name 'dataset' is not defined
[16]:
lat = coords['lat'].astype(float).values.reshape(-1,)
long = coords['long'].astype(float).values.reshape(-1,)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[16], line 1
----> 1 lat = coords['lat'].astype(float).values.reshape(-1,)
2 long = coords['long'].astype(float).values.reshape(-1,)
NameError: name 'coords' is not defined
[17]:
idx = 0
ax_num = 0
fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()
while ax_num < 25:
val = static.iloc[:, idx]
idx += 1
try:
c = val.astype(float).values.reshape(-1,)
en = 222
ax = axes[ax_num]
ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)
process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
)
ax_num += 1
except ValueError:
continue
plt.tight_layout()
plt.show()
print(idx)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[17], line 9
5 axes = axes.flatten()
7 while ax_num < 25:
----> 9 val = static.iloc[:, idx]
10 idx += 1
12 try:
NameError: name 'static' is not defined
[18]:
idx = 32
ax_num = 0
fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()
while ax_num < 25:
val = static.iloc[:, idx]
idx += 1
try:
c = val.astype(float).values.reshape(-1,)
en = 222
ax = axes[ax_num]
ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)
process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
)
ax_num += 1
except ValueError:
continue
plt.tight_layout()
plt.show()
print(idx)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[18], line 9
5 axes = axes.flatten()
7 while ax_num < 25:
----> 9 val = static.iloc[:, idx]
10 idx += 1
12 try:
NameError: name 'static' is not defined
[19]:
idx = 59
ax_num = 0
fig, axes = plt.subplots(5, 5, figsize=(15, 12))
axes = axes.flatten()
while ax_num < 25:
val = static.iloc[:, idx]
idx += 1
try:
c = val.astype(float).values.reshape(-1,)
en = 222
ax = axes[ax_num]
ax, sc = scatter(long[0:en], lat[0:en], c=c[0:en], cmap="hot", show=False, ax=ax)
process_cbar(ax, sc, border=False, title=val.name, #title_kws ={"fontsize": 14}
)
ax_num += 1
except ValueError:
continue
plt.tight_layout()
plt.show()
print(idx)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[19], line 9
5 axes = axes.flatten()
7 while ax_num < 25:
----> 9 val = static.iloc[:, idx]
10 idx += 1
12 try:
NameError: name 'static' is not defined
Dyanmic Features
[20]:
dataset.dynamic_features
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[20], line 1
----> 1 dataset.dynamic_features
NameError: name 'dataset' is not defined
Streamflow
[21]:
streamflow = dataset.q_mmd()
streamflow.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[21], line 1
----> 1 streamflow = dataset.q_mmd()
3 streamflow.shape
NameError: name 'dataset' is not defined
[22]:
streamflow
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[22], line 1
----> 1 streamflow
NameError: name 'streamflow' is not defined
[23]:
# EDA(data=streamflow, save=False).heatmap()
[24]:
fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
for idx, ax in enumerate(axes.flat):
hist(streamflow.iloc[:, idx].values.reshape(-1,),
bins=20,
ax=ax,
show=False
)
plt.show()
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[24], line 5
1 fig, axes = plt.subplots(7, 7, figsize=(10, 10), sharey="all")
3 for idx, ax in enumerate(axes.flat):
----> 5 hist(streamflow.iloc[:, idx].values.reshape(-1,),
6 bins=20,
7 ax=ax,
8 show=False
9 )
11 plt.show()
NameError: name 'streamflow' is not defined
[25]:
_ = hist(streamflow.skew().values.reshape(-1,), bins=50)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[25], line 1
----> 1 _ = hist(streamflow.skew().values.reshape(-1,), bins=50)
NameError: name 'streamflow' is not defined
[26]:
df = dataset.fetch(stations=1, as_dataframe=True)
df = df.unstack() # the returned dataframe is a multi-indexed dataframe so we have to unstack it
df.columns = df.columns.get_level_values('dynamic_features')
df.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[26], line 1
----> 1 df = dataset.fetch(stations=1, as_dataframe=True)
2 df = df.unstack() # the returned dataframe is a multi-indexed dataframe so we have to unstack it
3 df.columns = df.columns.get_level_values('dynamic_features')
NameError: name 'dataset' is not defined
[27]:
df
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[27], line 1
----> 1 df
NameError: name 'df' is not defined
[28]:
# get name of all stations as list
stns = dataset.stations()
len(stns)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[28], line 2
1 # get name of all stations as list
----> 2 stns = dataset.stations()
3 len(stns)
NameError: name 'dataset' is not defined
get data of 10 % of stations as dataframe
[29]:
df = dataset.fetch(0.1, as_dataframe=True)
df.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[29], line 1
----> 1 df = dataset.fetch(0.1, as_dataframe=True)
2 df.shape
NameError: name 'dataset' is not defined
[30]:
df
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[30], line 1
----> 1 df
NameError: name 'df' is not defined
The returned dataframe is a multi-indexed data
[31]:
df.index.names == ['time', 'dynamic_features']
df
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[31], line 1
----> 1 df.index.names == ['time', 'dynamic_features']
3 df
NameError: name 'df' is not defined
get data by station id
[32]:
df = dataset.fetch(stations='224214A', as_dataframe=True).unstack()
df.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[32], line 1
----> 1 df = dataset.fetch(stations='224214A', as_dataframe=True).unstack()
2 df.shape
NameError: name 'dataset' is not defined
[33]:
df
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[33], line 1
----> 1 df
NameError: name 'df' is not defined
get names of available dynamic features
[34]:
dataset.dynamic_features
# get only selected dynamic features
data = dataset.fetch(1, as_dataframe=True,
dynamic_features=['airtemp_C_awap_max', 'pcp_mm_awap', 'aet_mm_silo_morton', 'q_cms_obs']).unstack()
data.shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[34], line 1
----> 1 dataset.dynamic_features
2 # get only selected dynamic features
3 data = dataset.fetch(1, as_dataframe=True,
4 dynamic_features=['airtemp_C_awap_max', 'pcp_mm_awap', 'aet_mm_silo_morton', 'q_cms_obs']).unstack()
NameError: name 'dataset' is not defined
[35]:
data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[35], line 1
----> 1 data
NameError: name 'data' is not defined
[36]:
# get names of available static features
dataset.static_features
# get data of 10 random stations
df = dataset.fetch(10, as_dataframe=True)
df.shape # remember this is a multiindexed dataframe
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[36], line 2
1 # get names of available static features
----> 2 dataset.static_features
3 # get data of 10 random stations
4 df = dataset.fetch(10, as_dataframe=True)
NameError: name 'dataset' is not defined
[37]:
# when we get both static and dynamic data, the returned data is a dictionary
# with ``static`` and ``dyanic`` keys.
data = dataset.fetch(stations='224214A', static_features="all", as_dataframe=True)
data['static'].shape, data['dynamic'].shape
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[37], line 3
1 # when we get both static and dynamic data, the returned data is a dictionary
2 # with ``static`` and ``dyanic`` keys.
----> 3 data = dataset.fetch(stations='224214A', static_features="all", as_dataframe=True)
4 data['static'].shape, data['dynamic'].shape
NameError: name 'dataset' is not defined
[38]:
data['static']
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[38], line 1
----> 1 data['static']
NameError: name 'data' is not defined
[39]:
data['dynamic']
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[39], line 1
----> 1 data['dynamic']
NameError: name 'data' is not defined
get data data of all stations as xarray dataset
[40]:
data = dataset.fetch()
data
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[40], line 1
----> 1 data = dataset.fetch()
2 data
NameError: name 'dataset' is not defined