if 'google.colab' in str(get_ipython()):
from google.colab import drive
'/content/drive', force_remount=False)
drive.mount(!pip install mirzai
else:
2. Select & transform
Selection of data subset based on features, target and auxiliary data such as Soil Taxonomy order. Simple transformation such as log-10 transform of the target is also done (and piped).
from mirzai.data.loading import load_kssl
from mirzai.data.selection import (select_y, select_tax_order, select_X)
from mirzai.data.transform import log_transform_y
from fastcore.transform import compose
import warnings
'ignore') warnings.filterwarnings(
2.1 Piping data selection and simple transformation
= 'data'
src_dir = ['spectra-features.npy', 'spectra-wavenumbers.npy',
fnames 'depth-order.npy', 'target.npy',
'tax-order-lu.pkl', 'spectra-id.npy']
= load_kssl(src_dir, fnames=fnames) X, X_names, depth_order, y, tax_lookup, X_id
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')
print(f'Wavenumbers:\n {X_names}')
print(f'depth_order (first 3 rows):\n {depth_order[:3, :]}')
print(f'Taxonomic order lookup:\n {tax_lookup}')
X shape: (50494, 1764)
y shape: (50494,)
Wavenumbers:
[3999 3997 3995 ... 603 601 599]
depth_order (first 3 rows):
[[43. 2.]
[ 0. 0.]
[ 0. 1.]]
Taxonomic order lookup:
{'alfisols': 0, 'mollisols': 1, 'inceptisols': 2, 'entisols': 3, 'spodosols': 4, 'undefined': 5, 'ultisols': 6, 'andisols': 7, 'histosols': 8, 'oxisols': 9, 'vertisols': 10, 'aridisols': 11, 'gelisols': 12}
= X, y, X_id, depth_order
data
= [select_y, select_tax_order, select_X, log_transform_y]
transforms = compose(*transforms)(data) X, y, X_id, depth_order
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')
print(f'depth_order shape: {depth_order.shape}')
X shape: (40132, 1764)
y shape: (40132,)
depth_order shape: (40132, 2)