Coverage for uqmodels / data_generation / Gen_two_dimension_uncertainty_data.py: 52%
225 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-09 08:15 +0000
1import matplotlib.pyplot as plt
2import numpy as np
3from sklearn.preprocessing import StandardScaler
5import uqmodels.utils as ut
6from uqmodels.postprocessing.UQ_processing import (
7 compute_Epistemic_score,
8 fit_Epistemic_score,
9 get_extremum_var_TOT_and_ndUQ_ratio,
10 process_UQmeasure_to_TOT_and_E_sigma,
11)
14def feature_augment(X, x_min, y_min):
15 angle = np.angle(X[:, 0] + X[:, 1] * 1j)[:, None]
16 print(angle.min())
17 norm = np.abs((X[:, 0] + X[:, 1] * 1j) * 0.001)[:, None]
18 sqrtnorm = np.sqrt(np.abs((X[:, 0] + X[:, 1] * 1j) * 0.001)[:, None])
19 pwdangle = np.power(angle, 2)
20 sqrtangle = np.sqrt(angle + np.pi)
21 expX = np.exp(X[:, 0])[:, None]
22 expY = np.exp(X[:, 1])[:, None]
23 logX = np.log(X[:, 0] - x_min + 1)[:, None]
24 logY = np.log(X[:, 1] - y_min + 1)[:, None]
25 if False:
26 X = np.concatenate(
27 [X, angle, pwdangle, sqrtangle, norm, sqrtnorm, expX, expY, logX, logY],
28 axis=1,
29 )
30 else:
31 X = np.concatenate([X, expX, expY, logX, logY], axis=1)
33 return X
36def var_vec_matrix(i, mat, y, s=1):
37 dist = (mat - mat[i]) ** 2
38 dist = np.sum(dist, axis=1)
39 dist = np.sqrt(dist)
40 return y[dist < s].std()
43def gen_target(X):
44 X = X
45 a = (1 - np.cos(0.5 + X[:, 0] * 2 * np.pi)) + 1.5 * np.sin(
46 -0.1 + X[:, 0] + X[:, 1] * 2 * np.pi
47 )
48 a = a + 2 * ((1 - np.power(X[:, 0], 2)) + (1 - np.power(X[:, 1], 2)))
49 return a
52def gen_UQ(X, noise_x):
53 X = X
54 # a = 2 *((1-np.power(0.5-X[:,0],2))+(1-np.power(X[:,1],2)))
55 # b = 2*((1-np.power(0.7+X[:,0],2))+(1-np.power(0.7+X[:,1],2)))
56 # a = np.power((a-a.min())/(a.max()-a.min()),400) + np.power((b-b.min())/(b.max()-b.min()),400)
57 a = np.cos(0.3 + X[:, 0] * 2 * np.pi) + np.sin(X[:, 1] * 2 * np.pi)
58 a = np.power((a - a.min()) / (a.max() - a.min()), 4)
59 a = (a / a.max() + 0.05) * noise_x
60 return a
63def apply_UQ(X, noise_x):
64 UQ_ = gen_UQ(X, noise_x)
65 sigma = np.tile(UQ_[:, None], 2)
66 X = X + np.random.normal(sigma * 0, sigma)
67 return X
70def core_gen(
71 n_samples=6000,
72 n_mid=3000,
73 n_mid_mid=200,
74 shuffle=True,
75 noise_x=0.08,
76 noise_outliers=0.14,
77 keep_val=0.01,
78 noise_target=0.05,
79 random_state=0,
80):
81 outer_circ_x = -0.5 + np.cos(np.linspace(0, np.pi + (np.pi / 2.2), n_mid))[::-1]
82 outer_line_x = np.linspace(0, 0.5, n_mid_mid)[::-1]
83 outer_circ_y = np.sin(np.linspace(0 - (np.pi / 2.2), np.pi, n_mid))
85 inner_circ_x = 1 - np.cos(np.linspace(0, np.pi + np.pi / 2.2, n_mid))
86 inner_line_x = np.linspace(0, -0, n_mid_mid)[::-1]
87 inner_circ_y = -np.sin(np.linspace(0, np.pi + np.pi / 2.2, n_mid))
89 X = np.vstack(
90 [
91 np.concatenate([outer_circ_x, outer_line_x, inner_circ_x]) - 0.25,
92 np.concatenate([outer_circ_y, inner_line_x, inner_circ_y]),
93 ]
94 ).T
96 X += np.random.normal(X * 0, scale=noise_outliers / 10, size=X.shape)
98 sigma = np.tile(gen_UQ(X, noise_x)[:, None], 2)
99 # Add outliers
100 ind_big_alea = np.random.choice(np.arange(len(X)), 300)
101 X[ind_big_alea] += np.random.normal(
102 X[ind_big_alea] * 0,
103 scale=sigma[ind_big_alea] + noise_outliers,
104 size=X[ind_big_alea].shape,
105 )
107 # Apply noise on target
108 target = gen_target(apply_UQ(X, noise_x))
109 # Add noise on input independtly of target
110 X = apply_UQ(X, noise_x)
111 target = ut.cut(target, 0.01, 0.99)
112 target_min = target.min()
113 target_max = target.max()
114 target = (target - target_min) / (target_max - target_min)
116 keep = (np.random.rand(len(X)) < keep_val) | (
117 (np.arange(len(X)) < 2000) | (np.arange(len(X)) > 2700)
118 )
120 # Basé sur l'ordre initial sans prise en compte de la perturbation features
121 # pseudo_var = np.array([y[max(0,i-100):i+100].var() for i in np.arange(len(X))])
122 # Basé sur le voisnage après perturbation : prise en compte de la var X et Y
124 pseudo_var = np.array(
125 [var_vec_matrix(i, X[:, :2], target, s=0.05) for i in np.arange(len(X))]
126 )
128 var_max = np.quantile(pseudo_var, 0.98)
129 var_min = np.quantile(pseudo_var, 0.2)
131 plt.figure(figsize=(12, 4))
132 plt.subplot(1, 3, 1)
133 plt.scatter(
134 X[:, 0], X[:, 1], c=target[:], vmin=0, vmax=1, s=3, cmap=plt.get_cmap("jet")
135 )
136 plt.subplot(1, 3, 2)
137 plt.scatter(
138 X[:, 0],
139 X[:, 1],
140 c=pseudo_var[:],
141 vmin=var_min,
142 vmax=var_max,
143 s=3,
144 cmap=plt.get_cmap("plasma"),
145 )
146 plt.subplot(1, 3, 3)
147 plt.scatter(X[:, 0], X[:, 1], c=keep, s=3, cmap=plt.get_cmap("jet"))
148 plt.show()
150 X = feature_augment(X, X[:, 0].min() - 0.5, X[:, 1].min() - 0.5)
151 plt.figure()
153 plt.scatter(
154 np.arange(len(target))[keep], target[keep], color=plt.get_cmap("jet", 2)(0)
155 )
156 plt.scatter(
157 np.arange(len(target))[~keep], target[~keep], color=plt.get_cmap("jet", 2)(1)
158 )
159 plt.show()
161 train = np.zeros(len(X))
162 # Select ramdomly train point
163 train[np.random.choice(np.arange(len(X)), int(len(X) / 2), replace=False)] = 1
164 # Remove small part to create OOD subset
165 train[np.invert(keep)] = 0
166 train = train.astype(bool)
167 test = np.invert(train)
169 shape = (200, 200)
170 x1 = np.linspace(X[:, 0].min() - 0.3, X[:, 0].max() + 0.3, shape[0])
171 x2 = np.linspace(X[:, 1].min() - 0.3, X[:, 1].max() + 0.3, shape[1])
172 # full coordinate arrays
173 xx, yy = np.meshgrid(x1, x2)
174 grid_X = np.concatenate([xx.reshape(-1)[:, None], yy.reshape(-1)[:, None]], axis=1)
176 grid_X = feature_augment(grid_X, X[:, 0].min() - 0.5, X[:, 1].min() - 0.5)
178 grid_sigma = gen_UQ(grid_X, noise_x)
180 grid_target = gen_target(grid_X)
181 grid_target = (grid_target - target_min) / (target_max - target_min)
182 grid_target = ut.threshold(grid_target, min_val=0, max_val=1)
184 print(target.min(), target.max())
185 print(grid_target.min(), grid_target.max())
187 x1 = grid_X[:, 0].reshape(shape)
188 x2 = grid_X[:, 1].reshape(shape)
189 plt.figure(figsize=(15, 15))
190 plt.subplot(2, 2, 1)
191 plt.title("Prediction")
192 # h = plt.contourf(
193 # x1,
194 # x2,
195 # grid_target.reshape(shape),
196 # levels=np.arange(0, 1.05, 0.05),
197 # vmin=0,
198 # vmax=1,
199 # alpha=0.2,
200 # cmap=plt.get_cmap("jet"),
201 # )
202 # cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")
203 plt.scatter(
204 X[:, 0], X[:, 1], c=target, vmin=0, vmax=1, s=1, cmap=plt.get_cmap("jet")
205 )
206 plt.subplot(2, 2, 2)
207 plt.title("UQ")
208 # h = plt.contourf(
209 # x1,
210 # x2,
211 # grid_sigma.reshape(shape),
212 # levels=20,
213 # alpha=0.2,
214 # cmap=plt.get_cmap("jet"),
215 # )
216 # cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")
217 plt.scatter(X[:, 0], X[:, 1], c=pseudo_var, s=1, cmap=plt.get_cmap("jet"))
219 features_scaler = StandardScaler()
220 X = features_scaler.fit_transform(X)
221 grid_X = features_scaler.transform(grid_X)
223 dict_data = {}
224 dict_data["X"] = X
225 dict_data["Y"] = target
226 dict_data["context"] = None
227 dict_data["train"] = train
228 dict_data["test"] = test
229 dict_data["X_split"] = train
230 dict_data["aux"] = {
231 "grid_X": grid_X,
232 "grid_Y": grid_target,
233 "pseudo_var": pseudo_var,
234 "grid_sigma": grid_sigma,
235 "keep": keep,
236 }
238 return dict_data
241def generate_default(dict_params=dict()):
242 dict_data = core_gen(**dict_params)
243 return dict_data
246def compute_val(pred, UQ, UQ_grid, train):
247 params_ = fit_Epistemic_score(
248 UQ,
249 "var_A&E",
250 pred=None,
251 y=None,
252 type_UQ_params=None,
253 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],
254 var_min=0,
255 var_max=None,
256 min_cut=0.1,
257 max_cut=0.97,
258 q_var=1,
259 q_Eratio=3,
260 mode="score",
261 reduc_filter=None,
262 )
264 extremum_var_TOT, ndUQ_ratio = get_extremum_var_TOT_and_ndUQ_ratio(
265 UQ,
266 type_UQ="var_A&E",
267 pred=None,
268 y=None,
269 type_UQ_params=None,
270 min_cut=0,
271 max_cut=0.97,
272 var_min=0,
273 var_max=None,
274 factor=1,
275 q_var=1,
276 q_Eratio=0.5,
277 E_cut_in_var_nominal=True,
278 )
280 A, E = process_UQmeasure_to_TOT_and_E_sigma(
281 UQ,
282 "var_A&E",
283 pred=None,
284 y=None,
285 type_UQ_params=None,
286 var_min=0,
287 var_max=None,
288 min_cut=0,
289 max_cut=1,
290 q_var=1,
291 q_var_e=1,
292 k_var_e=1,
293 ndUQ_ratio=ndUQ_ratio,
294 extremum_var_TOT=extremum_var_TOT,
295 reduc_filter=None,
296 roll=0,
297 )
298 print(extremum_var_TOT)
300 A, E = np.power(A, 2), np.power(E, 2)
302 Amin = np.quantile(A, 0.01)
303 Amax = np.quantile(A, 0.99)
304 Emin = np.quantile(E, 0.01)
305 Emax = np.quantile(E, 0.99)
307 UQ = (A, E)
308 dE, params_ = compute_Epistemic_score(
309 UQ,
310 "var_A&E",
311 pred=None,
312 y=None,
313 type_UQ_params=None,
314 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],
315 var_min=0,
316 var_max=None,
317 min_cut=0.1,
318 max_cut=0.97,
319 q_var=1,
320 q_Eratio=1.01,
321 mode="score",
322 reduc_filter=None,
323 params_=params_,
324 )
325 dE = np.log(dE)
327 dEmin = np.quantile(dE, 0.01)
328 dEmax = np.quantile(dE, 0.99)
330 grid_A, grid_E = process_UQmeasure_to_TOT_and_E_sigma(
331 UQ_grid,
332 "var_A&E",
333 pred=None,
334 y=None,
335 type_UQ_params=None,
336 var_min=0,
337 var_max=None,
338 min_cut=0,
339 max_cut=1,
340 q_var=1,
341 q_var_e=1,
342 k_var_e=1,
343 ndUQ_ratio=ndUQ_ratio,
344 extremum_var_TOT=extremum_var_TOT,
345 reduc_filter=None,
346 roll=0,
347 )
349 grid_A, grid_E = np.power(grid_A, 2), np.power(grid_E, 2)
351 UQ_grid = (grid_A, grid_E)
352 grid_dE, params_ = compute_Epistemic_score(
353 UQ_grid,
354 "var_A&E",
355 pred=None,
356 y=None,
357 type_UQ_params=None,
358 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],
359 var_min=0,
360 var_max=None,
361 min_cut=0.1,
362 max_cut=0.97,
363 q_var=1,
364 q_Eratio=1.01,
365 mode="score",
366 reduc_filter=None,
367 params_=params_,
368 )
370 grid_dE = np.log(grid_dE)
372 E[E > Emax] = Emax
373 E[E < Emin] = Emin
374 A[A > Amax] = Amax
375 A[A < Amin] = Amin
376 dE[dE > dEmax] = dEmax
377 dE[dE < dEmin] = dEmin
379 grid_E[grid_E > Emax] = Emax
380 grid_E[grid_E < Emin] = Emin
381 grid_A[grid_A > Amax] = Amax
382 grid_A[grid_A < Amin] = Amin
383 grid_dE[grid_dE > dEmax] = dEmax
384 grid_dE[grid_dE < dEmin] = dEmin
386 A = np.log(A)
387 Amin, Amax = np.log(Amin), np.log(Amax)
388 grid_A = np.log(grid_A)
390 return (A, grid_A, Amin, Amax, E, grid_E, Emin, Emax, dE, grid_dE, dEmin, dEmax)
393def two_dimensional_plot(
394 y,
395 pred,
396 pred_grid,
397 X,
398 grid_X,
399 A,
400 Amin,
401 Amax,
402 dE,
403 dEmin,
404 dEmax,
405 keep,
406 grid_A,
407 pseudo_var,
408 var_min,
409 var_max,
410 grid_E,
411 Emin,
412 Emax,
413 grid_dE,
414 test,
415 color_lvl=10,
416):
417 mask = keep
419 plt.figure(figsize=(12, 8))
420 plt.subplot(2, 3, 1)
421 plt.title("Cible à prédire (couleur)")
422 h = plt.scatter(
423 X[test, 0], X[test, 1], c=y[test], vmin=0, vmax=1, s=5, cmap=plt.get_cmap("jet")
424 )
425 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")
426 plt.xlabel("X1")
427 plt.ylabel("X2")
428 plt.subplot(2, 3, 2)
429 plt.title("Prediction")
430 h = plt.scatter(
431 X[test, 0],
432 X[test, 1],
433 c=pred[test],
434 vmin=0,
435 vmax=1,
436 s=5,
437 cmap=plt.get_cmap("jet"),
438 )
439 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="pred_value")
440 plt.xlabel("X1")
441 plt.ylabel("X2")
442 plt.subplot(2, 3, 3)
443 plt.title("Erreur modèle")
444 res = y[test] - pred[test]
445 h = plt.scatter(
446 X[test, 0],
447 X[test, 1],
448 c=res,
449 vmin=-1,
450 vmax=1,
451 cmap=plt.get_cmap("Spectral_r"),
452 s=5,
453 )
454 cbar = plt.colorbar(
455 h, orientation="horizontal", ticks=[res.min(), 0, res.max()], label="residu"
456 )
457 plt.xlabel("X1")
458 plt.ylabel("X2")
460 plt.subplot(2, 3, 4)
461 plt.title("train_data")
462 h = plt.scatter(X[:, 0], X[:, 1], c=keep, s=1, cmap=plt.get_cmap("jet"))
463 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="train data")
464 cbar.ax.set_xticklabels(["Seen", "Unseen"])
465 plt.xlabel("X1")
466 plt.ylabel("X2")
468 plt.subplot(2, 3, 5)
469 plt.title("Aleatoric")
470 h = plt.scatter(
471 X[test, 0],
472 X[test, 1],
473 c=A[test],
474 cmap=plt.get_cmap("plasma"),
475 vmin=Amin,
476 vmax=Amax,
477 s=5,
478 )
479 cbar = plt.colorbar(
480 h, orientation="horizontal", ticks=[Amin, Amax], label="Var Aleatoric"
481 )
482 cbar.ax.set_xticklabels(["Low", "High"])
483 plt.xlabel("X1")
484 plt.ylabel("X2")
486 plt.subplot(2, 3, 6)
487 plt.title("Epipstemic")
489 h = plt.scatter(
490 X[test, 0],
491 X[test, 1],
492 c=dE[test],
493 cmap=plt.get_cmap("Spectral_r"),
494 vmin=dEmin,
495 vmax=dEmax,
496 s=1,
497 )
498 cbar = plt.colorbar(h, orientation="horizontal", ticks=[dEmin, dEmax], label="Conf")
499 cbar.ax.set_xticklabels(["Low risk (meduim of train)", "High risk"])
500 plt.xlabel("X1")
501 plt.ylabel("X2")
502 plt.tight_layout()
503 plt.show()
505 shape = (200, 200)
506 x1 = grid_X[:, 0].reshape(shape)
507 x2 = grid_X[:, 1].reshape(shape)
509 plt.figure(figsize=(15, 15))
510 plt.subplot(2, 2, 1)
511 plt.title("Prediction")
512 h = plt.contourf(
513 x1,
514 x2,
515 pred_grid.reshape(shape),
516 levels=color_lvl,
517 vmin=0,
518 vmax=1,
519 alpha=0.2,
520 cmap=plt.get_cmap("jet"),
521 )
522 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")
523 plt.scatter(
524 X[mask, 0], X[mask, 1], c=y[mask], vmin=0, vmax=1, s=1, cmap=plt.get_cmap("jet")
525 )
527 plt.subplot(2, 2, 2)
528 plt.title("Incertitude Aleatoric")
529 h = plt.contourf(
530 x1,
531 x2,
532 grid_A.reshape(shape),
533 levels=color_lvl,
534 alpha=0.2,
535 vmin=Amin,
536 vmax=Amax,
537 cmap=plt.get_cmap("plasma", 10),
538 )
539 plt.scatter(
540 X[mask, 0],
541 X[mask, 1],
542 c=pseudo_var[mask],
543 vmin=var_min,
544 vmax=var_max,
545 s=1,
546 cmap=plt.get_cmap("plasma"),
547 )
548 cbar = plt.colorbar(
549 h, orientation="horizontal", ticks=[Amin, Amax], label="Var Aleatoric"
550 )
552 plt.subplot(2, 2, 3)
553 plt.title("Confiance Epistemic")
554 h = plt.contourf(
555 x1,
556 x2,
557 grid_E.reshape(shape),
558 levels=color_lvl,
559 alpha=0.2,
560 vmin=Emin,
561 vmax=Emax,
562 cmap=plt.get_cmap("Spectral_r"),
563 )
564 cbar = plt.colorbar(h, orientation="horizontal", ticks=[Emin, Emax], label="Conf")
565 cbar.ax.set_xticklabels(["Low risk", "High risk"])
566 h = plt.scatter(
567 X[mask, 0],
568 X[mask, 1],
569 c=np.invert(keep)[mask],
570 s=1,
571 cmap=plt.get_cmap("jet"),
572 alpha=0.4,
573 )
575 plt.subplot(2, 2, 4)
576 plt.title("Confiance D-Epistemic")
577 h = plt.contourf(
578 x1,
579 x2,
580 grid_dE.reshape(shape),
581 levels=color_lvl,
582 alpha=0.2,
583 vmin=dEmin,
584 vmax=dEmax,
585 cmap=plt.get_cmap("Spectral_r"),
586 )
587 cbar = plt.colorbar(h, orientation="horizontal", ticks=[dEmin, dEmax], label="Conf")
588 cbar.ax.set_xticklabels(["Low risk", "High risk"])
589 h = plt.scatter(
590 X[mask, 0],
591 X[mask, 1],
592 c=np.invert(keep)[mask],
593 s=1,
594 cmap=plt.get_cmap("jet"),
595 alpha=0.4,
596 )
597 plt.tight_layout()
598 plt.show()