Summary of wastewater treatment datasets
This file describes summary of datasets available in water_datasets package for wastewater treatment. The datasets are divided into following categories:
Adsorption
Photocatalysis
Membrane processes
Sonolysis
[1]:
import os
import site
if __name__ == '__main__':
wd_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath('__file__')))))
# wd_dir = os.path.dirname(os.path.dirname(os.path.realpath('__file__')))
#wd_dir = os.path.dirname(os.path.realpath('__file__')) # for debugging
print(wd_dir)
site.addsitedir(wd_dir)
from aqua_fetch import (
ec_removal_biochar,
cr_removal,
po4_removal_biochar,
heavy_metal_removal,
industrial_dye_removal,
heavy_metal_removal_Shen,
P_recovery,
N_recovery,
As_recovery,
mg_degradation,
dye_removal,
dichlorophenoxyacetic_acid_removal,
pms_removal,
micropollutant_removal_osmosis,
ion_transport_via_reverse_osmosis,
cyanobacteria_disinfection
)
/home/docs/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest
Adsorption
[2]:
data, _ = ec_removal_biochar()
print(data.shape)
(3757, 29)
[3]:
print(data.columns)
Index(['pyrolysis_temperature', 'pyrolysis_time', 'C', 'H', 'O', 'N',
'(O+N)/C', 'ash', 'H/C', 'O/C', 'N/C', 'surface_area', 'pore_volume',
'average_pore_size', 'adsorption_time', 'initial_concentration',
'solution_ph', 'rpm', 'volume', 'adsorbent_dosage',
'adsorption_temperature', 'ion_concentration', 'humic_acid',
'adsorbent', 'pollutant', 'wastewater_type', 'adsorption_type',
'capacity', 'final_concentration'],
dtype='object')
[4]:
data, _ = cr_removal()
print(data.shape)
(219, 20)
[5]:
print(data.columns)
Index(['adsorbent', 'NaOH_conc_M', 'surface_area', 'pore_volume', 'C_%',
'Al_%', 'Nb_%', 'O_%', 'Na_%', 'pore_size', 'adsorption_time',
'initial_conc', 'loading_g/L', 'volume_l', 'loading_g', 'solution_ph',
'cycle_number', 'final_conc', 'adsorption_capacity',
'removal_efficiency'],
dtype='object')
[6]:
data, _ = po4_removal_biochar()
print(data.shape)
(5014, 33)
[7]:
print(data.columns)
Index(['adsorbent', 'feedstock', 'activation', 'pyrolysis_temp',
'heating_rate', 'pyrolysis_time', 'C_%', 'H_%', 'O_%', 'N_%', 'S_%',
'Ca_%', 'ash', 'H/C', 'O/C', 'N/C', '(O+N/C)', 'surface_area',
'pore_volume', 'avg_pore_size', 'adsorption_time_min', 'Ci_ppm',
'solution_pH', 'rpm', 'volume_l', 'loading_g', 'loading_g/L',
'adsorption_temp', 'ion_concentration_mM', 'ion_type', 'final_conf',
'qe', 'efficiency'],
dtype='object')
[8]:
data, _ = heavy_metal_removal()
print(data.shape)
(219, 18)
[9]:
print(data.columns)
Index(['adsorbent', 'NaOH_conc_M', 'surface_area', 'pore_volume', 'C_%',
'Al_%', 'Nb_%', 'O_%', 'Na_%', 'pore_size', 'adsorption_time',
'initial_conc', 'loading_g/L', 'volume_l', 'loading_g', 'solution_ph',
'cycle_number', 'final_conc'],
dtype='object')
[10]:
data, _ = industrial_dye_removal()
print(data.shape)
(680, 29)
[11]:
print(data.columns)
Index(['adsorbent', 'calcination_temperature', 'calcination_time_min', 'C_%',
'H_%', 'O_%', 'N_%', 'ash', 'H/C', 'O/C', 'N/C', 'surface_area',
'pore_volume', 'average_pore_size', 'dye', 'adsorption_time_min',
'initial_concentration', 'solution_ph', 'rpm', 'volume_l',
'loading_g/l', 'adsorption_temperature', 'ion_concentration_M',
'humic_acid', 'wastewater_type', 'adsorption_type',
'final_concentration', 'qe', 'adsorbent_loading'],
dtype='object')
[12]:
data, _ = heavy_metal_removal_Shen()
print(data.shape)
(353, 18)
[13]:
print(data.columns)
Index(['heavy_metal', 'hm_label', 'ph_bichar', 'C_%', '(O+N)/C', 'O/C', 'H/C',
'ash', 'PS', 'SA', 'CEC', 'temperature', 'solution_ph', 'C0', 'χ', 'r',
'Ncharge', 'n'],
dtype='object')
[14]:
data, _ = P_recovery()
print(data.shape)
(504, 8)
[15]:
print(data.columns)
Index(['stir_rpm', 'time_min', 'temperature_C', 'pH', 'N:P', 'Mg:P',
'P_initial_mgl', 'P_recovery_%'],
dtype='object')
[16]:
data, _ = N_recovery()
print(data.shape)
(210, 8)
[17]:
print(data.columns)
Index(['stir_rpm', 'time_min', 'temperature_C', 'pH', 'N:P', 'Mg:N',
'P_initial_mgl', 'N_recovery_%'],
dtype='object')
[18]:
data, _ = As_recovery()
print(data.shape)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[18], line 1
----> 1 data, _ = As_recovery()
2 print(data.shape)
File ~/checkouts/readthedocs.org/user_builds/water-datasets/checkouts/latest/aqua_fetch/wwt/adsorption.py:950, in As_recovery(parameters, encoding)
946 data.rename(columns=columns, inplace=True)
948 parameters = check_attributes(parameters, list(columns.values()), 'parameters')
--> 950 data = data[parameters]
952 data, encoders = encode_cols(data, ['material', 'biochar_modification', 'biochar_type', 'As_type'], encoding)
954 return data, encoders
File ~/checkouts/readthedocs.org/user_builds/water-datasets/envs/latest/lib/python3.12/site-packages/pandas/core/frame.py:3899, in DataFrame.__getitem__(self, key)
3897 if is_iterator(key):
3898 key = list(key)
-> 3899 indexer = self.columns._get_indexer_strict(key, "columns")[1]
3901 # take() does not accept boolean indexers
3902 if getattr(indexer, "dtype", None) == bool:
File ~/checkouts/readthedocs.org/user_builds/water-datasets/envs/latest/lib/python3.12/site-packages/pandas/core/indexes/base.py:6115, in Index._get_indexer_strict(self, key, axis_name)
6112 else:
6113 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 6115 self._raise_if_missing(keyarr, indexer, axis_name)
6117 keyarr = self.take(indexer)
6118 if isinstance(key, Index):
6119 # GH 42790 - Preserve name from an Index
File ~/checkouts/readthedocs.org/user_builds/water-datasets/envs/latest/lib/python3.12/site-packages/pandas/core/indexes/base.py:6176, in Index._raise_if_missing(self, key, indexer, axis_name)
6174 if use_interval_msg:
6175 key = list(key)
-> 6176 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6178 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
6179 raise KeyError(f"{not_found} not in index")
KeyError: "None of [Index(['material', 'biochar_modification', 'biochar_type', 'BET_surface_area',\n 'pore_volume', 'solution_pH', 'reactor_temperature',\n 'initial_As_concentration_mg_L', 'adsorbent_dosage',\n 'equilibrium_reaction_time_h', 'pyrolysis_temperature', 'As_mg_g',\n 'As_type'],\n dtype='object')] are in the [columns]"
[19]:
print(data.columns)
Index(['stir_rpm', 'time_min', 'temperature_C', 'pH', 'N:P', 'Mg:N',
'P_initial_mgl', 'N_recovery_%'],
dtype='object')
Photocatalysis
[20]:
data, _ = mg_degradation()
print(data.shape)
(1200, 14)
[21]:
print(data.columns)
Index(['surface_area', 'pore_volume', 'catalyst_loading_g/l',
'Light_intensity (W)', 'time_min', 'solution_pH', 'HA (mg/L)',
'ini_conc_mg/l', 'final_conc_mg/l', 'catalyst_type', 'anions',
'Efficiency (%)', 'k_first', 'k_2nd'],
dtype='object')
[22]:
data, _ = dye_removal()
print(data.shape)
(1527, 36)
[23]:
print(data.columns)
Index(['catalyst', 'hydrothermal_synthesis_time_min', 'energy_band_gap_eV',
'C_%', 'O_%', 'Fe_%', 'Al_%', 'Ni_%', 'Mo_%', 'S_%', 'Bi', 'Ag', 'Pd',
'Pt', 'surface_area_m2/g', 'pore_volume_cm3/g', 'pore_size_nm',
'volume_l', 'loading_g', 'light_intensity_watt', 'light_source_dist_cm',
'time_m', 'dye', 'log_kw', 'hydrogen_bonding_accep_count',
'hydrogen_bonding_donor_count', 'solubility_g/l', 'molecular_wt_g/M',
'pka1', 'pka2', 'dye_conc_mg/l', 'solution_ph', 'ha_mg/l', 'anions',
'k_1st', 'k_2nd'],
dtype='object')
[24]:
data, _ = dichlorophenoxyacetic_acid_removal()
print(data.shape)
(1044, 16)
[25]:
print(data.columns)
Index(['catalyst', 'surface_area', 'pore_volume', 'energy_band_gap_eV', 'Au_%',
'Bi_%', 'Fe_%', 'O_%', 'catalyst_loading_g/l', 'light_intensity_watt',
'time_min', 'solution_ph', 'anions', 'ini_conc_mg/l', 'final_conc_mg/l',
'efficiency_%'],
dtype='object')
[26]:
data, _ = pms_removal()
print(data.shape)
(2078, 25)
[27]:
print(data.columns)
Index(['time_min', 'catalyst_type', 'magnetization_Ms_emu/g',
'energy_band_gap_eV', 'calcination_temp_C', 'min_calcination_time',
'surface_area', 'pore_size', 'pollutant', 'poll_mol_formula',
'pms_concentration_g/l', 'light_intensity_watt', 'light_type',
'catalyst_dosage_g/l', 'ini_conc_ppm', 'solution_ph', 'H2O2_Conc_ppm',
'volume_ml', 'stirring_speed_rpm', 'radical_scavenger',
'inorganic anions', 'water_type', 'cycle_num', 'final_conc_ppm',
'removal_efficiency_%'],
dtype='object')
Membrane processes
[28]:
# data, _ = micropollutant_removal_osmosis()
# print(data.shape)
# # %%
# data, _ = ion_transport_via_reverse_osmosis()
# print(data.shape)
[29]:
data, _ = cyanobacteria_disinfection()
print(data.shape)
(314, 146)
[30]:
print(data.columns)
Index(['Time (min)', 'Cyanobacterial cell count',
'wastewater concentration (Ci)', 'Sonicator power density',
'Concentration of H2O2', 'Volume (mL)', 'Solution pH',
'final count/0.5 mL', 'final count/mL', 'Particles/mL',
...
'Transparency Max', 'Volume (ABD) Mean', 'Volume (ABD) Min',
'Volume (ABD) Max', 'Volume (ESD) Mean', 'Volume (ESD) Min',
'Volume (ESD) Max', 'Width Mean', 'Width Min', 'Width Max'],
dtype='object', length=146)