ecnet.tasks
feature_selection
Feature selection functions
select_rfr(dataset, total_importance=0.95, **kwargs)
select_rfr: reduces input data dimensionality such that specified proportion of total feature importance (derived from random forest regression) is retained in feature subset
Parameters:
Name | Type | Description | Default |
---|---|---|---|
dataset |
QSPRDataset |
input data |
required |
total_importance |
float |
total feature importance to retain |
0.95 |
**kwargs |
|
additional arguments passed to sklearn.ensemble.RandomForestRegressor |
{} |
Returns:
Type | Description |
---|---|
Tuple[List[int], List[float]] |
tuple[list[int], list[float]]: (selected feature indices, selected feature importances) |
Source code in ecnet/tasks/feature_selection.py
def select_rfr(dataset: QSPRDataset, total_importance: float = 0.95,
**kwargs) -> Tuple[List[int], List[float]]:
"""
select_rfr: reduces input data dimensionality such that specified proportion of total feature
importance (derived from random forest regression) is retained in feature subset
Args:
dataset (QSPRDataset): input data
total_importance (float): total feature importance to retain
**kwargs: additional arguments passed to sklearn.ensemble.RandomForestRegressor
Returns:
tuple[list[int], list[float]]: (selected feature indices, selected feature importances)
"""
X = dataset.desc_vals
y = [dv[0] for dv in dataset.target_vals]
regr = RandomForestRegressor(**kwargs)
regr.fit(X, y)
importances = sorted(
[(regr.feature_importances_[i], i)
for i in range(len(dataset.desc_vals[0]))],
key=lambda x: x[0], reverse=True
)
tot_imp = 0.0
for idx, i in enumerate(importances):
tot_imp += i[0]
idx_cutoff = idx
if tot_imp >= total_importance:
break
desc_imp = [i[0] for i in importances][:idx_cutoff]
desc_idx = [i[1] for i in importances][:idx_cutoff]
return (desc_idx, desc_imp)
parameter_tuning
tune_batch_size(n_bees, n_iter, n_processes=1, **kwargs)
Tunes the batch size during training
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n_bees |
int |
number of employer bees to use in ABC algorithm |
required |
n_iter |
int |
number of iterations, or "search cycles", for ABC algorithm |
required |
n_processes |
int |
if > 1, uses multiprocessing when evaluating at an iteration |
1 |
**kwargs |
|
arguments passed to _cost_batch_size |
{} |
Returns:
Type | Description |
---|---|
dict |
dict: {'batch_size': tuned batch size} |
Source code in ecnet/tasks/parameter_tuning.py
def tune_batch_size(n_bees: int, n_iter: int, n_processes: int = 1, **kwargs) -> dict:
"""
Tunes the batch size during training
Args:
n_bees (int): number of employer bees to use in ABC algorithm
n_iter (int): number of iterations, or "search cycles", for ABC algorithm
n_processes (int): if > 1, uses multiprocessing when evaluating at an iteration
**kwargs: arguments passed to _cost_batch_size
Returns:
dict: {'batch_size': tuned batch size}
"""
abc = ABC(n_bees, _cost_batch_size, num_processes=n_processes, obj_fn_args=kwargs)
abc.add_param(1, len(kwargs.get('train_ds').desc_vals), name='batch_size')
abc.initialize()
for _ in range(n_iter):
abc.search()
return {'batch_size': abc.best_params['batch_size']}
tune_model_architecture(n_bees, n_iter, n_processes=1, **kwargs)
Tunes the NN's architecture
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n_bees |
int |
number of employer bees to use in ABC algorithm |
required |
n_iter |
int |
number of iterations, or "search cycles", for ABC algorithm |
required |
n_processes |
int |
if > 1, uses multiprocessing when evaluating at an iteration |
1 |
**kwargs |
|
arguments passed to _cost_batch_size |
{} |
Returns:
Type | Description |
---|---|
dict |
dict: {'batch_size': opt_val, 'n_hidden': opt_val, 'dropout': opt_val} |
Source code in ecnet/tasks/parameter_tuning.py
def tune_model_architecture(n_bees: int, n_iter: int, n_processes: int = 1, **kwargs) -> dict:
"""
Tunes the NN's architecture
Args:
n_bees (int): number of employer bees to use in ABC algorithm
n_iter (int): number of iterations, or "search cycles", for ABC algorithm
n_processes (int): if > 1, uses multiprocessing when evaluating at an iteration
**kwargs: arguments passed to _cost_batch_size
Returns:
dict: {'batch_size': opt_val, 'n_hidden': opt_val, 'dropout': opt_val}
"""
abc = ABC(n_bees, _cost_arch, num_processes=n_processes, obj_fn_args=kwargs)
abc.add_param(CONFIG['architecture_params_range']['hidden_dim'][0],
CONFIG['architecture_params_range']['hidden_dim'][1], name='hidden_dim')
abc.add_param(CONFIG['architecture_params_range']['n_hidden'][0],
CONFIG['architecture_params_range']['n_hidden'][1], name='n_hidden')
abc.add_param(CONFIG['architecture_params_range']['dropout'][0],
CONFIG['architecture_params_range']['dropout'][1], name='dropout')
abc.initialize()
for _ in range(n_iter):
abc.search()
return {
'hidden_dim': abc.best_params['hidden_dim'],
'n_hidden': abc.best_params['n_hidden'],
'dropout': abc.best_params['dropout']
}
tune_training_parameters(n_bees, n_iter, n_processes=1, **kwargs)
Tunes the NN's training parameters (Adam optim. fn.)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n_bees |
int |
number of employer bees to use in ABC algorithm |
required |
n_iter |
int |
number of iterations, or "search cycles", for ABC algorithm |
required |
n_processes |
int |
if > 1, uses multiprocessing when evaluating at an iteration |
1 |
**kwargs |
|
arguments passed to _cost_batch_size |
{} |
Returns:
Type | Description |
---|---|
dict |
dict: {'lr': opt_val, 'lr_decay': opt_val} |
Source code in ecnet/tasks/parameter_tuning.py
def tune_training_parameters(n_bees: int, n_iter: int, n_processes: int = 1, **kwargs) -> dict:
"""
Tunes the NN's training parameters (Adam optim. fn.)
Args:
n_bees (int): number of employer bees to use in ABC algorithm
n_iter (int): number of iterations, or "search cycles", for ABC algorithm
n_processes (int): if > 1, uses multiprocessing when evaluating at an iteration
**kwargs: arguments passed to _cost_batch_size
Returns:
dict: {'lr': opt_val, 'lr_decay': opt_val}
"""
abc = ABC(n_bees, _cost_train_hp, num_processes=n_processes, obj_fn_args=kwargs)
abc.add_param(CONFIG['training_params_range']['lr'][0],
CONFIG['training_params_range']['lr'][1], name='lr')
abc.add_param(CONFIG['training_params_range']['lr_decay'][0],
CONFIG['training_params_range']['lr_decay'][1], name='lr_decay')
abc.initialize()
for _ in range(n_iter):
abc.search()
return {
'lr': abc.best_params['lr'],
'lr_decay': abc.best_params['lr_decay']
}