Coverage for uqmodels / modelization / DL_estimator / transformer_ed.py: 76%
236 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
1import tensorflow as tf
2from keras.layers import TimeDistributed
3from tensorflow.keras import Input, layers
4from uqmodels.modelization.DL_estimator.data_embedding import (
5 Factice_Time_Extension,
6 Mouving_conv_Embedding,
7 Mouving_Windows_Embedding,
8 PositionalEmbedding,
9)
10from uqmodels.modelization.DL_estimator.neural_network_UQ import (
11 NN_UQ)
12from uqmodels.modelization.DL_estimator.metalayers import mlp
13from uqmodels.modelization.DL_estimator.utils import set_global_determinism
14from uqmodels.modelization.DL_estimator.data_generator import Folder_Generator
15from uqmodels.utils import add_random_state, stack_and_roll
18@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
19class MultiHeadAttention(tf.keras.layers.MultiHeadAttention):
20 pass
23@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
24class LayerNormalization(tf.keras.layers.LayerNormalization):
25 pass
28@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
29class Dropout(tf.keras.layers.Dropout):
30 pass
33# Transformer Encoder Layer
34@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
35class Dense(tf.keras.layers.Dense):
36 pass
39# Transformer Encoder Layer
40@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
41class TransformerEncoder(layers.Layer):
42 """Transformer Encoder Layer from https://keras.io/examples/audio/transformer_asr/"""
44 def __init__(
45 self,
46 dim_z,
47 num_heads,
48 feed_forward_dim,
49 dp_rec=0.1,
50 flag_mc=False,
51 random_state=None,
52 **kwargs
53 ):
55 super().__init__()
56 self.dim_z = dim_z
57 self.num_heads = num_heads
58 self.feed_forward_dim = feed_forward_dim
59 self.dp_rec = dp_rec
60 self.flag_mc = flag_mc
61 self.random_state = random_state
62 set_global_determinism(self.random_state)
64 # Layers instanciation
65 self.att = MultiHeadAttention(num_heads=num_heads, key_dim=dim_z)
66 self.dense1 = Dense(feed_forward_dim, activation="relu")
67 self.dense2 = Dense(dim_z)
69 self.layernorm1 = LayerNormalization(epsilon=1e-6)
70 self.layernorm2 = LayerNormalization(epsilon=1e-6)
71 self.dropout1 = Dropout(dp_rec, seed=self.random_state)
72 self.dropout2 = Dropout(dp_rec, seed=add_random_state(self.random_state, 1))
74 def call(self, inputs, training=None):
75 """_summary_
77 Args:
78 inputs (_type_): _description_
79 training (_type_): _description_
81 Returns:
82 _type_: _description_
83 """
84 if training is None:
85 training = False
87 attn_output = self.att(inputs, inputs)
88 if self.dp_rec > 0:
89 attn_output = self.dropout1(attn_output, training=training | self.flag_mc)
90 out1 = self.layernorm1(inputs + attn_output)
91 ffn_output = self.dense2(self.dense1(out1))
92 if self.dp_rec > 0:
93 ffn_output = self.dropout2(ffn_output, training=training | self.flag_mc)
94 return self.layernorm2(out1 + ffn_output)
96 def get_config(self):
97 config = {
98 "dim_z": self.dim_z,
99 "num_heads": self.num_heads,
100 "feed_forward_dim": self.feed_forward_dim,
101 "dp_rec": self.dp_rec,
102 "flag_mc": self.flag_mc,
103 "random_state": self.random_state,
104 "att": tf.keras.utils.serialize_keras_object(self.att),
105 "layernorm1": tf.keras.utils.serialize_keras_object(self.layernorm1),
106 "layernorm2": tf.keras.utils.serialize_keras_object(self.layernorm2),
107 "dense1": tf.keras.utils.serialize_keras_object(self.dense1),
108 "dense2": tf.keras.utils.serialize_keras_object(self.dense2),
109 "dropout1": tf.keras.utils.serialize_keras_object(self.dropout1),
110 "dropout2": tf.keras.utils.serialize_keras_object(self.dropout2),
111 }
112 config = config
113 return config
115 @classmethod
116 def from_config(cls, config):
117 att = config.pop("att")
118 layernorm1 = config.pop("layernorm1")
119 layernorm2 = config.pop("layernorm2")
120 dropout1 = config.pop("dropout1")
121 dropout2 = config.pop("dropout2")
122 dense1 = config.pop("dense1")
123 dense2 = config.pop("dense2")
125 obj = cls(**config)
126 print(dense1)
127 print(att)
128 obj.att = tf.keras.utils.deserialize_keras_object(att)
129 obj.layernorm1 = tf.keras.utils.deserialize_keras_object(layernorm1)
130 obj.layernorm2 = tf.keras.utils.deserialize_keras_object(layernorm2)
131 obj.dropout1 = tf.keras.utils.deserialize_keras_object(dropout1)
132 obj.dropout2 = tf.keras.utils.deserialize_keras_object(dropout2)
133 obj.dense1 = tf.keras.utils.deserialize_keras_object(dense1)
134 obj.dense2 = tf.keras.utils.deserialize_keras_object(dense2)
136 return obj
139# Transformer Decoder Layer
140@tf.keras.utils.register_keras_serializable(package="UQModels_layers")
141class TransformerDecoder(layers.Layer):
142 """Transformer Encoder Layer from https://keras.io/examples/audio/transformer_asr/"""
144 def __init__(
145 self,
146 dim_z,
147 dim_horizon,
148 num_heads,
149 feed_forward_dim,
150 dp_rec=0.1,
151 flag_mc=False,
152 random_state=None,
153 **kwargs
154 ):
156 super().__init__()
157 self.dim_z = dim_z
158 self.dim_horizon = dim_horizon
159 self.num_heads = num_heads
160 self.feed_forward_dim = feed_forward_dim
161 self.dp_rec = dp_rec
162 self.flag_mc = flag_mc
163 self.random_state = random_state
164 set_global_determinism(self.random_state)
166 self.layernorm1 = LayerNormalization(epsilon=1e-6)
167 self.layernorm2 = LayerNormalization(epsilon=1e-6)
168 self.layernorm3 = LayerNormalization(epsilon=1e-6)
169 self.self_att = MultiHeadAttention(num_heads=num_heads, key_dim=dim_z)
170 self.enc_att = MultiHeadAttention(num_heads=num_heads, key_dim=dim_z)
171 self.self_dropout = Dropout(dp_rec, seed=random_state)
172 self.enc_dropout = Dropout(dp_rec, seed=add_random_state(random_state, 1))
173 self.ffn_dropout = Dropout(dp_rec, seed=add_random_state(random_state, 2))
174 self.dense1 = Dense(feed_forward_dim, activation="relu")
175 self.dense2 = Dense(dim_z)
177 def causal_attention_mask(self, batch_size, n_dest, n_src, dim_horizon, dtype):
178 """Masks the upper half of the dot product matrix in self attention.
180 This prevents flow of information from future tokens to current token.
181 1's in the lower triangle, counting from the lower right corner.
182 """
183 len_past = n_dest - dim_horizon
184 i = tf.concat(
185 [
186 tf.zeros(len_past, dtype=tf.int32) + len_past - 1,
187 tf.range(dim_horizon) + len_past,
188 ],
189 0,
190 )[:, None]
191 j = tf.range(n_src)
192 m = (i) >= (j - n_src + n_dest)
193 mask = tf.cast(m, dtype)
194 mask = tf.reshape(mask, [1, n_dest, n_src])
195 mult = tf.concat(
196 [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
197 )
198 return tf.tile(mask, mult)
200 def call(self, enc_out, target, training=None):
201 """_summary_
203 Args:
204 enc_out (_type_): _description_
205 target (_type_): _description_
207 Returns:
208 _type_: _description_
209 """
210 if training is None:
211 training = False
213 input_shape = tf.shape(target)
214 batch_size = input_shape[0]
215 seq_len = input_shape[1]
216 causal_mask = self.causal_attention_mask(
217 batch_size, seq_len, seq_len, self.dim_horizon, tf.bool
218 )
219 target_att = self.self_att(target, target, attention_mask=causal_mask)
220 target_norm = self.layernorm1(
221 target + self.self_dropout(target_att, training=training | self.flag_mc)
222 )
223 enc_out = self.enc_att(target_norm, enc_out)
224 enc_out_norm = self.layernorm2(
225 self.enc_dropout(enc_out, training=training | self.flag_mc) + target_norm
226 )
227 ffn_out = self.dense2(self.dense1(enc_out_norm))
228 ffn_out_norm = self.layernorm3(
229 enc_out_norm + self.ffn_dropout(ffn_out, training=training | self.flag_mc)
230 )
231 return ffn_out_norm
233 def get_config(self):
234 config = {
235 "dim_z": self.dim_z,
236 "dim_horizon": self.dim_horizon,
237 "num_heads": self.num_heads,
238 "feed_forward_dim": self.feed_forward_dim,
239 "dp_rec": self.dp_rec,
240 "flag_mc": self.flag_mc,
241 "random_state": self.random_state,
242 "layernorm1": tf.keras.utils.serialize_keras_object(self.layernorm1),
243 "layernorm2": tf.keras.utils.serialize_keras_object(self.layernorm2),
244 "layernorm3": tf.keras.utils.serialize_keras_object(self.layernorm3),
245 "self_att": tf.keras.utils.serialize_keras_object(self.self_att),
246 "enc_att": tf.keras.utils.serialize_keras_object(self.enc_att),
247 "self_dropout": tf.keras.utils.serialize_keras_object(self.self_dropout),
248 "enc_dropout": tf.keras.utils.serialize_keras_object(self.enc_dropout),
249 "ffn_dropout": tf.keras.utils.serialize_keras_object(self.ffn_dropout),
250 "dense1": tf.keras.utils.serialize_keras_object(self.dense1),
251 "dense2": tf.keras.utils.serialize_keras_object(self.dense2),
252 }
253 return config
255 @classmethod
256 def from_config(cls, config):
257 layernorm1 = config.pop("layernorm1")
258 layernorm2 = config.pop("layernorm2")
259 layernorm3 = config.pop("layernorm3")
260 self_att = config.pop("self_att")
261 enc_att = config.pop("enc_att")
262 self_dropout = config.pop("self_dropout")
263 enc_dropout = config.pop("enc_dropout")
264 ffn_dropout = config.pop("ffn_dropout")
265 dense1 = config.pop("dense1")
266 dense2 = config.pop("dense2")
267 print(config)
268 obj = cls(**config)
270 obj.layernorm1 = tf.keras.utils.deserialize_keras_object(layernorm1)
271 obj.layernorm2 = tf.keras.utils.deserialize_keras_object(layernorm2)
272 obj.layernorm3 = tf.keras.utils.deserialize_keras_object(layernorm3)
273 obj.self_att = tf.keras.utils.deserialize_keras_object(self_att)
274 obj.enc_att = tf.keras.utils.deserialize_keras_object(enc_att)
275 obj.self_dropout = tf.keras.utils.deserialize_keras_object(self_dropout)
276 obj.enc_dropout = tf.keras.utils.deserialize_keras_object(enc_dropout)
277 obj.ffn_dropout = tf.keras.utils.deserialize_keras_object(ffn_dropout)
278 obj.dense1 = tf.keras.utils.deserialize_keras_object(dense1)
279 obj.dense2 = tf.keras.utils.deserialize_keras_object(dense2)
280 return obj
283# encoder
284def build_transformer(
285 size_window=10,
286 n_windows=5,
287 step=1,
288 dim_target=1,
289 dim_chan=1,
290 dim_horizon=3,
291 dim_ctx=20,
292 dim_z=100,
293 num_heads=2,
294 num_feed_forward=128,
295 num_layers_enc=3,
296 num_layers_dec=2,
297 layers_enc=[150],
298 layers_dec=[150, 75],
299 dp=0.05,
300 dp_rec=0.03,
301 k_reg=(0.00001, 0.00001),
302 list_strides=[2, 1],
303 list_filters=None,
304 list_kernels=None,
305 dim_dyn=None,
306 with_positional_embedding=False,
307 with_ctx_input=True,
308 with_convolution=True,
309 type_output=None,
310 random_state=None,
311 **kwargs
312):
313 """Builder for Transformer ED with convolutive preprocessing
315 Args:
316 size_window (int, optional): Size of window for lag values. Defaults to 10.
317 n_windows (int, optional): Number of window in past. Defaults to 5.
318 step (int, optional): step between windows. Defaults to 1.
319 dim_target (int, optional): dimension of TS. Defaults to 1.
320 dim_chan (int, optional): Number of channel of TS. Defaults to 1.
321 dim_horizon (int, optional): futur_horizon to predict. Defaults to 3.
322 dim_ctx (int, optional): Number of ctx_features. Defaults to 20.
323 dim_z (int, optional): Size of latent sapce. Defaults to 100.
324 num_heads (int, optional): num of heads transformer. Defaults to 2.
325 num_feed_forward (int, optional): feed_forward transfomer dimension. Defaults to 128.
326 num_layers_enc (int, optional): num of transformer enc block
327 (after concatenation of past values embeding + ctx) . Defaults to 3.
328 num_layers_dec (int, optional): num of transformer dec block Defaults to 2.
329 layers_enc (list, optional):size of MLP preprocessing
330 (after concatenation of past values embeding + ctx) Defaults to [150].
331 layers_dec (list, optional): size of MLP interpretor. Defaults to 2.
332 dp (float, optional): dropout. Defaults to 0.05.
333 dp_t (float, optional): transformer dropout. Defaults to 0.1.
334 k_reg (tuple, optional): _description_. Defaults to (0.00001, 0.00001).
335 dim_dyn (int, None): size of dyn inputs, if None consider dim_dyn have same size than dim target
336 with_positional_embedding (bool, optional): _description_. Defaults to False.
337 with_ctx_input (bool, optional): Expect ctx features in addition to lag. Defaults to True.
338 with_convolution (bool, optional): use convolution rather than
339 whole lag values in the windows. Defaults to True.
340 type_output (_type_, optional): mode of UQ (see NN_UQ). Defaults to None.
341 random_state (bool): handle experimental random using seed.
342 Returns:
343 transformer : multi-step forecaster with UQ
344 """
345 if dim_dyn is None:
346 dim_dyn = dim_target
348 flag_mc = 0
349 if type_output in ["BNN", "MC_Dropout"]:
350 flag_mc = 1
352 set_global_determinism(random_state)
354 # Embedding_interpretor
355 Interpretor = mlp(
356 dim_in=dim_z,
357 dim_out=dim_target,
358 layers_size=layers_dec,
359 dp=dp,
360 type_output=type_output,
361 name="Interpretor",
362 random_state=random_state,
363 )
365 # dim_output_size = Interpretor.output.shape[-1]
367 Pos_Embeddor = None
368 if with_positional_embedding:
369 Pos_Embeddor = PositionalEmbedding(dim_z, max_len=size_window + dim_horizon - 1)
371 # Input definition
373 list_input = []
374 if with_ctx_input:
375 CTX_inputs = Input(shape=(n_windows, dim_ctx), name="encoder_inputs")
376 list_input.append(CTX_inputs)
378 Y_past_in = Input(shape=(size_window, dim_dyn), name="past_inputs")
379 list_input.append(Y_past_in)
381 Y_past = Y_past_in
383 # Preprocessing layers definition
384 if with_convolution:
385 MWE = Mouving_conv_Embedding(
386 size_window,
387 n_windows,
388 step=step,
389 dim_d=dim_dyn,
390 dim_chan=dim_chan,
391 use_conv2D=True,
392 list_strides=list_strides,
393 list_filters=list_filters,
394 list_kernels=list_kernels,
395 dp=0.05,
396 flag_mc=flag_mc,
397 seed=add_random_state(random_state, 100),
398 )
399 else:
400 MWE = Mouving_Windows_Embedding(
401 size_window,
402 n_windows,
403 step=step,
404 dim_d=dim_dyn,
405 dim_chan=dim_chan,
406 seed=add_random_state(random_state, 100),
407 )
409 FTE = Factice_Time_Extension(dim_horizon)
410 layers_enc.append(dim_z)
412 dim_embedding = MWE.last_shape
413 if with_ctx_input:
414 dim_embedding += dim_ctx
416 Embeddor_ctx = mlp(
417 dim_in=dim_embedding,
418 dim_out=None,
419 layers_size=layers_enc,
420 dp=dp,
421 name="Embeddor",
422 regularizer_W=k_reg,
423 random_state=add_random_state(random_state, 200),
424 )
426 # Preprocessing computation
427 Data = MWE(Y_past)
428 # Concat with cat features
429 if with_ctx_input:
430 Data = layers.Concatenate(axis=-1)([CTX_inputs, Data])
431 # Factice time augmentation (actually useless but can be usefull for extended predict horizon)
432 Data = FTE(Data)
434 Embedding = TimeDistributed(Embeddor_ctx)(Data)
436 # Static Pe that encode window position
438 if Pos_Embeddor:
439 Pe_Embedding = Pos_Embeddor(Embedding)
440 Embedding = Embedding + Pe_Embedding
442 # Encoder l'information passé
443 enc_out = Embedding[:, :(-dim_horizon), :]
444 encoder = []
445 for i in range(num_layers_enc):
446 encoder.append(
447 TransformerEncoder(
448 dim_z,
449 num_heads,
450 feed_forward_dim=50,
451 num_feed_forward=num_feed_forward,
452 dp_rec=dp_rec,
453 flag_mc=flag_mc,
454 random_state=add_random_state(random_state, 300 + i),
455 )
456 )
457 enc_out = encoder[-1](enc_out)
459 # For learning :
460 decoder = []
461 dec_out = enc_out
462 for i in range(num_layers_dec):
463 decoder.append(
464 TransformerDecoder(
465 dim_z=dim_z,
466 dim_horizon=dim_horizon,
467 feed_forward_dim=50,
468 num_heads=num_heads,
469 num_feed_forward=num_feed_forward,
470 dp_rec=dp_rec,
471 flag_mc=flag_mc,
472 random_state=add_random_state(random_state, 400 + i),
473 )
474 )
475 dec_out = decoder[-1](dec_out, Embedding)
477 outputs = TimeDistributed(Interpretor)(dec_out[:, -(dim_horizon):])
479 model = tf.keras.Model(list_input, outputs, name="model")
480 return model
483class Transformer_ED_UQ(NN_UQ):
484 """Transformer_ED for forecasting with UQ : see build_transformer to check model parameters"""
486 def __init__(
487 self,
488 model_parameters,
489 factory_parameters={"factory_lag_lt": 0, "factory_lag_st": 0},
490 training_parameters=dict(),
491 type_output=None,
492 rescale=False,
493 n_ech=5,
494 train_ratio=0.9,
495 name="Lstm_stacked",
496 random_state=None,
497 ):
498 """Initialization
500 Args:
501 model_parameters (_type_): _description_
502 factory_parameters (dict, optional): _description_. Defaults to {'factory_lag_lt': 0, 'factory_lag_st': 0}.
503 training_parameters (_type_, optional): _description_. Defaults to dict().
504 type_output (_type_, optional): _description_. Defaults to None.
505 rescale (bool, optional): _description_. Defaults to False.
506 n_ech (int, optional): _description_. Defaults to 8.
507 train_ratio (float, optional): _description_. Defaults to 0.9.
508 name (str, optional): _description_. Defaults to "Lstm_stacked".
509 random_state (bool): handle experimental random using seed.
511 """
512 if (random_state) is not None:
513 print("Warning : issues non-deterministic behaviour even with random state")
515 super().__init__(
516 model_initializer=build_transformer,
517 model_parameters=model_parameters,
518 factory_parameters=factory_parameters,
519 training_parameters=training_parameters,
520 type_output=type_output,
521 rescale=rescale,
522 n_ech=n_ech,
523 train_ratio=train_ratio,
524 name=name,
525 random_state=random_state,
526 )
528 def factory(self, X, y, mask=None, only_fit_scaler=False, **kwarg):
529 model_params = self.model_parameters
530 factory_params = self.factory_parameters
532 with_ctx_input = model_params["with_ctx_input"]
534 step = 1
535 if "step" in model_params.keys():
536 step = model_params["step"]
538 X_none = False
539 if X is None:
540 X_none = True
542 if X_none:
543 inputs = None
544 else:
545 if with_ctx_input:
546 X, X_lag = X
547 X, X_lag, mask = super().factory(X, X_lag, mask)
548 if only_fit_scaler:
549 return None
550 X_lt = stack_and_roll(
551 X,
552 model_params["n_windows"],
553 lag=factory_params["factory_lag_lt"],
554 step=step,
555 )
557 X_st = stack_and_roll(
558 X_lag,
559 model_params["size_window"],
560 lag=factory_params["factory_lag_st"] - 1,
561 step=step,
562 )
564 inputs = [X_lt, X_st]
565 else:
566 X, _, _ = super().factory(X, None, mask)
567 if only_fit_scaler:
568 return None
569 X_lag = X
570 X_st = stack_and_roll(
571 X,
572 model_params["size_window"],
573 lag=factory_params["factory_lag_st"] - 1,
574 step=step,
575 )
576 inputs = [X_st]
578 new_y = None
579 if y is not None:
580 _, y, _ = super().factory(None, y, mask)
581 new_y = stack_and_roll(
582 y,
583 model_params["dim_horizon"],
584 lag=model_params["dim_horizon"] - 1,
585 step=step,
586 )
587 return inputs, new_y, mask
589 def Build_generator(self, X, y, batch=32, shuffle=True, train=True):
590 return Folder_Generator(
591 X,
592 y,
593 self,
594 batch=batch,
595 shuffle=shuffle,
596 train=train,
597 random_state=self.random_state,
598 )
601def get_params_dict(
602 dim_ctx,
603 dim_dyn,
604 dim_target,
605 dim_chan=1,
606 size_window=20,
607 n_windows=5,
608 dim_horizon=5,
609 dim_z=50,
610 dp=0.05,
611 dp_rec=0.02,
612 num_heads=2,
613 num_feed_forward=128,
614 num_layers_enc=3,
615 num_layers_dec=2,
616 layers_enc=[75, 150, 75],
617 layers_dec=[200, 125, 75],
618 list_strides=[2, 1, 1, 1],
619 list_filters=[128, 128, 128],
620 list_kernels=None,
621 with_convolution=True,
622 with_ctx_input=True,
623 n_ech=3,
624 type_output="MC_Dropout",
625 random_state=None,
626):
627 dict_params = {
628 "dim_ctx": dim_ctx,
629 "size_window": size_window,
630 "n_windows": n_windows,
631 "dim_horizon": dim_horizon,
632 "dim_target": dim_target,
633 "dim_chan": dim_chan,
634 "step": 1,
635 "dim_z": dim_z,
636 "dp": dp,
637 "dp_rec": dp_rec,
638 "dim_dyn": dim_dyn,
639 "type_output": type_output,
640 "num_heads": num_heads,
641 "num_feed_forward": num_feed_forward,
642 "num_layers_enc": num_layers_enc,
643 "num_layers_dec": num_layers_dec,
644 "k_reg": (10e-6, 10e-6),
645 "layers_enc": layers_enc,
646 "layers_dec": layers_dec,
647 "list_strides": list_strides,
648 "list_filters": list_filters,
649 "list_kernels": list_kernels,
650 "with_convolution": with_convolution,
651 "with_ctx_input": with_ctx_input,
652 "n_ech": n_ech,
653 "random_state": random_state,
654 }
655 return dict_params