Skip to content

ecnet.datasets

ecnet.datasets.QSPRDataset

Bases: Dataset

Source code in ecnet/datasets/structs.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
class QSPRDataset(Dataset):

    def __init__(self, smiles: List[str], target_vals: Iterable[Iterable[float]],
                 backend: str = 'padel'):
        """
        QSPRDataset: creates a torch.utils.data.Dataset from SMILES strings and target values

        Args:
            smiles (list[str]): SMILES strings
            target_vals (Iterable[Iterable[float]]): target values of shape (n_samples, n_targets)
            backend (str, optional): backend for QSPR generation, ['padel', 'alvadesc']
        """

        self.smiles = smiles
        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
        self.desc_vals, self.desc_names = self.smi_to_qspr(smiles, backend)
        self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)

    @staticmethod
    def smi_to_qspr(smiles: List[str], backend: str) -> Tuple[List[List[float]], List[str]]:
        """
        Generate QSPR descriptors for each supplied SMILES string

        Args:
            smiles (list[str]): SMILES strings
            backend (str): backend for QSPR generation, ['padel', 'alvadesc']

        Returns:
            tuple[list[list[float]], list[str]]
        """

        if backend == 'padel':
            return _qspr_from_padel(smiles)
        elif backend == 'alvadesc':
            return _qspr_from_alvadesc(smiles)
        else:
            raise ValueError('Unknown backend software: {}'.format(backend))

    def set_index(self, index: List[int]):
        """
        Reduce the number of samples in the dataset; samples retained given by supplied indices

        Args:
            index (list[int]): indices of the dataset to retain, all others are removed
        """

        self.smiles = [self.smiles[i] for i in index]
        self.target_vals = torch.as_tensor([self.target_vals[i].numpy() for i in index])
        self.desc_vals = torch.as_tensor(
            [self.desc_vals[i].numpy() for i in index]
        )

    def set_desc_index(self, index: List[int]):
        """
        Reduce the number of features per sample; features retained given by supplied indices

        Args:
            index (list[int]): indices of the features to retain, all others are removed
        """

        self.desc_vals = torch.as_tensor(
            [[val[i] for i in index] for val in self.desc_vals]
        )
        self.desc_names = [self.desc_names[i] for i in index]

    def __len__(self):

        return len(self.smiles)

    def __getitem__(self, idx: int):
        """
        Dictionary representation of compound at index `idx`

        Args:
            idx (int): compound to return
        """

        smiles = self.smiles[idx]
        target_val = self.target_vals[idx]
        dv = self.desc_vals[idx]
        return {
            'smiles': smiles,
            'target_val': target_val,
            'desc_vals': dv,
            'desc_names': self.desc_names
        }

__getitem__(idx)

Dictionary representation of compound at index idx

Parameters:

Name Type Description Default
idx int

compound to return

required
Source code in ecnet/datasets/structs.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def __getitem__(self, idx: int):
    """
    Dictionary representation of compound at index `idx`

    Args:
        idx (int): compound to return
    """

    smiles = self.smiles[idx]
    target_val = self.target_vals[idx]
    dv = self.desc_vals[idx]
    return {
        'smiles': smiles,
        'target_val': target_val,
        'desc_vals': dv,
        'desc_names': self.desc_names
    }

__init__(smiles, target_vals, backend='padel')

QSPRDataset: creates a torch.utils.data.Dataset from SMILES strings and target values

Parameters:

Name Type Description Default
smiles list[str]

SMILES strings

required
target_vals Iterable[Iterable[float]]

target values of shape (n_samples, n_targets)

required
backend str

backend for QSPR generation, ['padel', 'alvadesc']

'padel'
Source code in ecnet/datasets/structs.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, smiles: List[str], target_vals: Iterable[Iterable[float]],
             backend: str = 'padel'):
    """
    QSPRDataset: creates a torch.utils.data.Dataset from SMILES strings and target values

    Args:
        smiles (list[str]): SMILES strings
        target_vals (Iterable[Iterable[float]]): target values of shape (n_samples, n_targets)
        backend (str, optional): backend for QSPR generation, ['padel', 'alvadesc']
    """

    self.smiles = smiles
    self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
    self.desc_vals, self.desc_names = self.smi_to_qspr(smiles, backend)
    self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)

set_desc_index(index)

Reduce the number of features per sample; features retained given by supplied indices

Parameters:

Name Type Description Default
index list[int]

indices of the features to retain, all others are removed

required
Source code in ecnet/datasets/structs.py
63
64
65
66
67
68
69
70
71
72
73
74
def set_desc_index(self, index: List[int]):
    """
    Reduce the number of features per sample; features retained given by supplied indices

    Args:
        index (list[int]): indices of the features to retain, all others are removed
    """

    self.desc_vals = torch.as_tensor(
        [[val[i] for i in index] for val in self.desc_vals]
    )
    self.desc_names = [self.desc_names[i] for i in index]

set_index(index)

Reduce the number of samples in the dataset; samples retained given by supplied indices

Parameters:

Name Type Description Default
index list[int]

indices of the dataset to retain, all others are removed

required
Source code in ecnet/datasets/structs.py
49
50
51
52
53
54
55
56
57
58
59
60
61
def set_index(self, index: List[int]):
    """
    Reduce the number of samples in the dataset; samples retained given by supplied indices

    Args:
        index (list[int]): indices of the dataset to retain, all others are removed
    """

    self.smiles = [self.smiles[i] for i in index]
    self.target_vals = torch.as_tensor([self.target_vals[i].numpy() for i in index])
    self.desc_vals = torch.as_tensor(
        [self.desc_vals[i].numpy() for i in index]
    )

smi_to_qspr(smiles, backend) staticmethod

Generate QSPR descriptors for each supplied SMILES string

Parameters:

Name Type Description Default
smiles list[str]

SMILES strings

required
backend str

backend for QSPR generation, ['padel', 'alvadesc']

required

Returns:

Type Description
Tuple[List[List[float]], List[str]]

tuple[list[list[float]], list[str]]

Source code in ecnet/datasets/structs.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@staticmethod
def smi_to_qspr(smiles: List[str], backend: str) -> Tuple[List[List[float]], List[str]]:
    """
    Generate QSPR descriptors for each supplied SMILES string

    Args:
        smiles (list[str]): SMILES strings
        backend (str): backend for QSPR generation, ['padel', 'alvadesc']

    Returns:
        tuple[list[list[float]], list[str]]
    """

    if backend == 'padel':
        return _qspr_from_padel(smiles)
    elif backend == 'alvadesc':
        return _qspr_from_alvadesc(smiles)
    else:
        raise ValueError('Unknown backend software: {}'.format(backend))

ecnet.datasets.QSPRDatasetFromFile

Bases: QSPRDataset

Source code in ecnet/datasets/structs.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
class QSPRDatasetFromFile(QSPRDataset):

    def __init__(self, smiles_fn: str, target_vals: Iterable[Iterable[float]],
                 backend: str = 'padel'):
        """
        QSPRDatasetFromFile: creates a torch.utils.data.Dataset given target values and a supplied
        filename/path to a SMILES file

        Args:
            smiles_fn (str): filename/path of SMILES file
            target_vals (Iterable[Iterable[float]]): target values of shape (n_samples, n_targets)
            backend (str, optional): backend for QSPR generation, ['padel', 'alvadesc']
        """

        self.smiles = self._open_smiles_file(smiles_fn)
        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
        if backend == 'padel':
            self.desc_vals, self.desc_names = self.smi_to_qspr(
                self.smiles, backend
            )
            self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)
        elif backend == 'alvadesc':
            self.desc_vals, self.desc_names = _qspr_from_alvadesc_smifile(
                smiles_fn
            )
            self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)

    @staticmethod
    def _open_smiles_file(smiles_fn: str) -> List[str]:
        """
        Open SMILES file at specified location

        Args:
            smiles_fn (str): filename/path of SMILES file

        Returns:
            list[str]: SMILES strings
        """

        with open(smiles_fn, 'r') as smi_file:
            smiles = smi_file.readlines()
        smi_file.close()
        smiles = [s.replace('\n', '') for s in smiles]
        return smiles

__init__(smiles_fn, target_vals, backend='padel')

QSPRDatasetFromFile: creates a torch.utils.data.Dataset given target values and a supplied filename/path to a SMILES file

Parameters:

Name Type Description Default
smiles_fn str

filename/path of SMILES file

required
target_vals Iterable[Iterable[float]]

target values of shape (n_samples, n_targets)

required
backend str

backend for QSPR generation, ['padel', 'alvadesc']

'padel'
Source code in ecnet/datasets/structs.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def __init__(self, smiles_fn: str, target_vals: Iterable[Iterable[float]],
             backend: str = 'padel'):
    """
    QSPRDatasetFromFile: creates a torch.utils.data.Dataset given target values and a supplied
    filename/path to a SMILES file

    Args:
        smiles_fn (str): filename/path of SMILES file
        target_vals (Iterable[Iterable[float]]): target values of shape (n_samples, n_targets)
        backend (str, optional): backend for QSPR generation, ['padel', 'alvadesc']
    """

    self.smiles = self._open_smiles_file(smiles_fn)
    self.target_vals = torch.as_tensor(target_vals).type(torch.float32)
    if backend == 'padel':
        self.desc_vals, self.desc_names = self.smi_to_qspr(
            self.smiles, backend
        )
        self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)
    elif backend == 'alvadesc':
        self.desc_vals, self.desc_names = _qspr_from_alvadesc_smifile(
            smiles_fn
        )
        self.desc_vals = torch.as_tensor(self.desc_vals).type(torch.float32)

ecnet.datasets.QSPRDatasetFromValues

Bases: QSPRDataset

Source code in ecnet/datasets/structs.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
class QSPRDatasetFromValues(QSPRDataset):

    def __init__(self, desc_vals: Iterable[Iterable[float]],
                 target_vals: Iterable[Iterable[float]]):
        """
        QSPRDatasetFromValues: creates a torch.utils.data.Dataset given supplied descriptor values,
        supplied target values

        Args:
            desc_vals (Iterable[Iterable[float]]): descriptor values, shape (n_samples, n_features)
            target_vals (Iterable[Iterable[float]]): target values, shape (n_samples, n_targets)
        """

        self.smiles = ['' for _ in range(len(target_vals))]
        self.desc_names = ['' for _ in range(len(desc_vals[0]))]
        self.desc_vals = torch.as_tensor(desc_vals).type(torch.float32)
        self.target_vals = torch.as_tensor(target_vals).type(torch.float32)

__init__(desc_vals, target_vals)

QSPRDatasetFromValues: creates a torch.utils.data.Dataset given supplied descriptor values, supplied target values

Parameters:

Name Type Description Default
desc_vals Iterable[Iterable[float]]

descriptor values, shape (n_samples, n_features)

required
target_vals Iterable[Iterable[float]]

target values, shape (n_samples, n_targets)

required
Source code in ecnet/datasets/structs.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
def __init__(self, desc_vals: Iterable[Iterable[float]],
             target_vals: Iterable[Iterable[float]]):
    """
    QSPRDatasetFromValues: creates a torch.utils.data.Dataset given supplied descriptor values,
    supplied target values

    Args:
        desc_vals (Iterable[Iterable[float]]): descriptor values, shape (n_samples, n_features)
        target_vals (Iterable[Iterable[float]]): target values, shape (n_samples, n_targets)
    """

    self.smiles = ['' for _ in range(len(target_vals))]
    self.desc_names = ['' for _ in range(len(desc_vals[0]))]
    self.desc_vals = torch.as_tensor(desc_vals).type(torch.float32)
    self.target_vals = torch.as_tensor(target_vals).type(torch.float32)

ecnet.datasets.load_bp

Loads boiling point data; target values given in Celsius

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def load_bp(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads boiling point data; target values given in Celsius

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('bp')
    return _load_set('bp', backend)

ecnet.datasets.load_cn

Loads cetane number data; target values given in CN units

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def load_cn(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads cetane number data; target values given in CN units

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('cn')
    return _load_set('cn', backend)

ecnet.datasets.load_cp

Loads cloud point data; target values given in Celsius

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def load_cp(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads cloud point data; target values given in Celsius

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('cp')
    return _load_set('cp', backend)

ecnet.datasets.load_kv

Loads kinematic viscosity data; target values given in mm^2/s (cSt) at 313 deg. K

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def load_kv(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads kinematic viscosity data; target values given in mm^2/s (cSt) at 313 deg. K

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('kv')
    return _load_set('kv', backend)

ecnet.datasets.load_lhv

Loads lower heating value data; target values given in MJ/kg = kJ/g

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def load_lhv(as_dataset: bool = False, backend: str = 'padel') -> Union[
             Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads lower heating value data; target values given in MJ/kg = kJ/g

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('lhv')
    return _load_set('lhv', backend)

ecnet.datasets.load_mon

Loads motor octane number data; target values given in MON units

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def load_mon(as_dataset: bool = False, backend: str = 'padel') -> Union[
             Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads motor octane number data; target values given in MON units

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('mon')
    return _load_set('mon', backend)

ecnet.datasets.load_pp

Loads pour point data; target values given in Celsius

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
def load_pp(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads pour point data; target values given in Celsius

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('pp')
    return _load_set('pp', backend)

ecnet.datasets.load_ron

Loads research octane number data; target values given in RON units

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def load_ron(as_dataset: bool = False, backend: str = 'padel') -> Union[
             Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads research octane number data; target values given in RON units

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('ron')
    return _load_set('ron', backend)

ecnet.datasets.load_ysi

Loads yield sooting index data; target values given in unified YSI units

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
def load_ysi(as_dataset: bool = False, backend: str = 'padel') -> Union[
             Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads yield sooting index data; target values given in unified YSI units

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('ysi')
    return _load_set('ysi', backend)

ecnet.datasets.load_mp

Loads melting point data; target values given in Celsius

Parameters:

Name Type Description Default
as_dataset bool

if True, return QSPRDatasetFromFile object housing data; otherwise, return tuple of smiles and target values

False
backend str

any in ['padel', 'alvadesc']

'padel'

Returns:

Type Description
Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]

Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles, target vals) or QSPRDatasetFromFile

Source code in ecnet/datasets/load_data.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def load_mp(as_dataset: bool = False, backend: str = 'padel') -> Union[
            Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]:
    """
    Loads melting point data; target values given in Celsius

    Args:
        as_dataset (bool, optional): if True, return QSPRDatasetFromFile object housing data;
            otherwise, return tuple of smiles and target values
        backend (str, optional): any in ['padel', 'alvadesc']

    Returns:
        Union[Tuple[List[str], List[List[float]]], QSPRDatasetFromFile]: either tuple of (smiles,
            target vals) or QSPRDatasetFromFile
    """

    if not as_dataset:
        return _get_file_data('mp')
    return _load_set('mp', backend)