Belle II Software development
normalize_features.py
1
8
9
10import numpy as np
11from typing import Union
12
13
14def _power(array: np.ndarray, power: Union[int, float]):
15 """Preprocessing function to take power of given feature."""
16 return np.sign(array) * np.power(np.abs(array), power)
17
18
19def _linear(array: np.ndarray, mu=0.0, sigma=1.0):
20 """Preprocessing function to linear scale given feature."""
21 return (array - mu) / sigma
22
23
24methods = {"power": _power, "linear": _linear}
25
26
28 normalize={},
29 features=[],
30 x=[],
31 edge_features=[],
32 x_edges=[],
33 global_features=[],
34 x_global=[],
35):
36 """
37 Function to normalize input features.
38
39 ``normalize`` should be a dictionary of the form ``{'power', [0.5], 'linear', [-0.5, 4.1]}``.
40 ``power`` and ``linear`` are the only processes supported.
41
42 Args:
43 normalize (dict): Normalization processes and parameters.
44 features (list): List of node feature names.
45 x (numpy.ndarray): Array of node features.
46 edge_features (list): List of edge feature names.
47 x_edges (numpy.ndarray): Array of edge features.
48 global_features (list): List of global feature names.
49 x_global (numpy.ndarray): Array of global features.
50 """
51 for feat, processes in normalize.items():
52 # Start with node features
53 feat_name = f"feat_{feat}"
54 if feat_name in features:
55 feat_idx = features.index(feat_name)
56
57 # Apply normalizations in order
58 for proc in processes:
59 args = proc[1:]
60 x[:, feat_idx] = methods[proc[0]](x[:, feat_idx], *args)
61 continue # assume no features of different type with same name
62
63 # Continue with edge features
64 feat_name = f"edge_{feat}"
65 if feat_name in edge_features:
66 feat_idx = edge_features.index(feat_name)
67
68 # Apply normalizations in order
69 for proc in processes:
70 args = proc[1:]
71 x_edges[:, feat_idx] = methods[proc[0]](x_edges[:, feat_idx], *args)
72 continue # assume no features of different type with same name
73
74 # Continue with global features
75 feat_name = f"glob_{feat}"
76 if feat_name in global_features:
77 feat_idx = global_features.index(feat_name)
78
79 # Apply normalizations in order
80 for proc in processes:
81 args = proc[1:]
82 x_global[:, feat_idx] = methods[proc[0]](x_global[:, feat_idx], *args)