Coverage for uqmodels/data_generation/Gen_two_dimension_uncertainty

1import matplotlib.pyplot as plt

2import numpy as np

3from sklearn.preprocessing import StandardScaler

5import uqmodels.utils as ut

6from uqmodels.postprocessing.UQ_processing import (

7 compute_Epistemic_score,

8 fit_Epistemic_score,

9 get_extremum_var_TOT_and_ndUQ_ratio,

10 process_UQmeasure_to_TOT_and_E_sigma,

11)

14def feature_augment(X, x_min, y_min):

15 angle = np.angle(X[:, 0] + X[:, 1] * 1j)[:, None]

16 print(angle.min())

17 norm = np.abs((X[:, 0] + X[:, 1] * 1j) * 0.001)[:, None]

18 sqrtnorm = np.sqrt(np.abs((X[:, 0] + X[:, 1] * 1j) * 0.001)[:, None])

19 pwdangle = np.power(angle, 2)

20 sqrtangle = np.sqrt(angle + np.pi)

21 expX = np.exp(X[:, 0])[:, None]

22 expY = np.exp(X[:, 1])[:, None]

23 logX = np.log(X[:, 0] - x_min + 1)[:, None]

24 logY = np.log(X[:, 1] - y_min + 1)[:, None]

25 if False:

26 X = np.concatenate(

27 [X, angle, pwdangle, sqrtangle, norm, sqrtnorm, expX, expY, logX, logY],

28 axis=1,

29 )

30 else:

31 X = np.concatenate([X, expX, expY, logX, logY], axis=1)

33 return X

36def var_vec_matrix(i, mat, y, s=1):

37 dist = (mat - mat[i]) ** 2

38 dist = np.sum(dist, axis=1)

39 dist = np.sqrt(dist)

40 return y[dist < s].std()

43def gen_target(X):

44 X = X

45 a = (1 - np.cos(0.5 + X[:, 0] * 2 * np.pi)) + 1.5 * np.sin(

46 -0.1 + X[:, 0] + X[:, 1] * 2 * np.pi

47 )

48 a = a + 2 * ((1 - np.power(X[:, 0], 2)) + (1 - np.power(X[:, 1], 2)))

49 return a

52def gen_UQ(X, noise_x):

53 X = X

54 # a = 2 *((1-np.power(0.5-X[:,0],2))+(1-np.power(X[:,1],2)))

55 # b = 2*((1-np.power(0.7+X[:,0],2))+(1-np.power(0.7+X[:,1],2)))

56 # a = np.power((a-a.min())/(a.max()-a.min()),400) + np.power((b-b.min())/(b.max()-b.min()),400)

57 a = np.cos(0.3 + X[:, 0] * 2 * np.pi) + np.sin(X[:, 1] * 2 * np.pi)

58 a = np.power((a - a.min()) / (a.max() - a.min()), 4)

59 a = (a / a.max() + 0.05) * noise_x

60 return a

63def apply_UQ(X, noise_x):

64 UQ_ = gen_UQ(X, noise_x)

65 sigma = np.tile(UQ_[:, None], 2)

66 X = X + np.random.normal(sigma * 0, sigma)

67 return X

70def core_gen(

71 n_samples=6000,

72 n_mid=3000,

73 n_mid_mid=200,

74 shuffle=True,

75 noise_x=0.08,

76 noise_outliers=0.14,

77 keep_val=0.01,

78 noise_target=0.05,

79 random_state=0,

80):

81 outer_circ_x = -0.5 + np.cos(np.linspace(0, np.pi + (np.pi / 2.2), n_mid))[::-1]

82 outer_line_x = np.linspace(0, 0.5, n_mid_mid)[::-1]

83 outer_circ_y = np.sin(np.linspace(0 - (np.pi / 2.2), np.pi, n_mid))

85 inner_circ_x = 1 - np.cos(np.linspace(0, np.pi + np.pi / 2.2, n_mid))

86 inner_line_x = np.linspace(0, -0, n_mid_mid)[::-1]

87 inner_circ_y = -np.sin(np.linspace(0, np.pi + np.pi / 2.2, n_mid))

89 X = np.vstack(

90 [

91 np.concatenate([outer_circ_x, outer_line_x, inner_circ_x]) - 0.25,

92 np.concatenate([outer_circ_y, inner_line_x, inner_circ_y]),

93 ]

94 ).T

96 X += np.random.normal(X * 0, scale=noise_outliers / 10, size=X.shape)

98 sigma = np.tile(gen_UQ(X, noise_x)[:, None], 2)

99 # Add outliers

100 ind_big_alea = np.random.choice(np.arange(len(X)), 300)

101 X[ind_big_alea] += np.random.normal(

102 X[ind_big_alea] * 0,

103 scale=sigma[ind_big_alea] + noise_outliers,

104 size=X[ind_big_alea].shape,

105 )

106

107 # Apply noise on target

108 target = gen_target(apply_UQ(X, noise_x))

109 # Add noise on input independtly of target

110 X = apply_UQ(X, noise_x)

111 target = ut.cut(target, 0.01, 0.99)

112 target_min = target.min()

113 target_max = target.max()

114 target = (target - target_min) / (target_max - target_min)

115

116 keep = (np.random.rand(len(X)) < keep_val) | (

117 (np.arange(len(X)) < 2000) | (np.arange(len(X)) > 2700)

118 )

119

120 # Basé sur l'ordre initial sans prise en compte de la perturbation features

121 # pseudo_var = np.array([y[max(0,i-100):i+100].var() for i in np.arange(len(X))])

122 # Basé sur le voisnage après perturbation : prise en compte de la var X et Y

123

124 pseudo_var = np.array(

125 [var_vec_matrix(i, X[:, :2], target, s=0.05) for i in np.arange(len(X))]

126 )

127

128 var_max = np.quantile(pseudo_var, 0.98)

129 var_min = np.quantile(pseudo_var, 0.2)

130

131 plt.figure(figsize=(12, 4))

132 plt.subplot(1, 3, 1)

133 plt.scatter(

134 X[:, 0], X[:, 1], c=target[:], vmin=0, vmax=1, s=3, cmap=plt.get_cmap("jet")

135 )

136 plt.subplot(1, 3, 2)

137 plt.scatter(

138 X[:, 0],

139 X[:, 1],

140 c=pseudo_var[:],

141 vmin=var_min,

142 vmax=var_max,

143 s=3,

144 cmap=plt.get_cmap("plasma"),

145 )

146 plt.subplot(1, 3, 3)

147 plt.scatter(X[:, 0], X[:, 1], c=keep, s=3, cmap=plt.get_cmap("jet"))

148 plt.show()

149

150 X = feature_augment(X, X[:, 0].min() - 0.5, X[:, 1].min() - 0.5)

151 plt.figure()

152

153 plt.scatter(

154 np.arange(len(target))[keep], target[keep], color=plt.get_cmap("jet", 2)(0)

155 )

156 plt.scatter(

157 np.arange(len(target))[~keep], target[~keep], color=plt.get_cmap("jet", 2)(1)

158 )

159 plt.show()

160

161 train = np.zeros(len(X))

162 # Select ramdomly train point

163 train[np.random.choice(np.arange(len(X)), int(len(X) / 2), replace=False)] = 1

164 # Remove small part to create OOD subset

165 train[np.invert(keep)] = 0

166 train = train.astype(bool)

167 test = np.invert(train)

168

169 shape = (200, 200)

170 x1 = np.linspace(X[:, 0].min() - 0.3, X[:, 0].max() + 0.3, shape[0])

171 x2 = np.linspace(X[:, 1].min() - 0.3, X[:, 1].max() + 0.3, shape[1])

172 # full coordinate arrays

173 xx, yy = np.meshgrid(x1, x2)

174 grid_X = np.concatenate([xx.reshape(-1)[:, None], yy.reshape(-1)[:, None]], axis=1)

175

176 grid_X = feature_augment(grid_X, X[:, 0].min() - 0.5, X[:, 1].min() - 0.5)

177

178 grid_sigma = gen_UQ(grid_X, noise_x)

179

180 grid_target = gen_target(grid_X)

181 grid_target = (grid_target - target_min) / (target_max - target_min)

182 grid_target = ut.threshold(grid_target, min_val=0, max_val=1)

183

184 print(target.min(), target.max())

185 print(grid_target.min(), grid_target.max())

186

187 x1 = grid_X[:, 0].reshape(shape)

188 x2 = grid_X[:, 1].reshape(shape)

189 plt.figure(figsize=(15, 15))

190 plt.subplot(2, 2, 1)

191 plt.title("Prediction")

192 # h = plt.contourf(

193 # x1,

194 # x2,

195 # grid_target.reshape(shape),

196 # levels=np.arange(0, 1.05, 0.05),

197 # vmin=0,

198 # vmax=1,

199 # alpha=0.2,

200 # cmap=plt.get_cmap("jet"),

201 # )

202 # cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")

203 plt.scatter(

204 X[:, 0], X[:, 1], c=target, vmin=0, vmax=1, s=1, cmap=plt.get_cmap("jet")

205 )

206 plt.subplot(2, 2, 2)

207 plt.title("UQ")

208 # h = plt.contourf(

209 # x1,

210 # x2,

211 # grid_sigma.reshape(shape),

212 # levels=20,

213 # alpha=0.2,

214 # cmap=plt.get_cmap("jet"),

215 # )

216 # cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")

217 plt.scatter(X[:, 0], X[:, 1], c=pseudo_var, s=1, cmap=plt.get_cmap("jet"))

218

219 features_scaler = StandardScaler()

220 X = features_scaler.fit_transform(X)

221 grid_X = features_scaler.transform(grid_X)

222

223 dict_data = {}

224 dict_data["X"] = X

225 dict_data["Y"] = target

226 dict_data["context"] = None

227 dict_data["train"] = train

228 dict_data["test"] = test

229 dict_data["X_split"] = train

230 dict_data["aux"] = {

231 "grid_X": grid_X,

232 "grid_Y": grid_target,

233 "pseudo_var": pseudo_var,

234 "grid_sigma": grid_sigma,

235 "keep": keep,

236 }

237

238 return dict_data

239

240

241def generate_default(dict_params=dict()):

242 dict_data = core_gen(**dict_params)

243 return dict_data

244

245

246def compute_val(pred, UQ, UQ_grid, train):

247 params_ = fit_Epistemic_score(

248 UQ,

249 "var_A&E",

250 pred=None,

251 y=None,

252 type_UQ_params=None,

253 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],

254 var_min=0,

255 var_max=None,

256 min_cut=0.1,

257 max_cut=0.97,

258 q_var=1,

259 q_Eratio=3,

260 mode="score",

261 reduc_filter=None,

262 )

263

264 extremum_var_TOT, ndUQ_ratio = get_extremum_var_TOT_and_ndUQ_ratio(

265 UQ,

266 type_UQ="var_A&E",

267 pred=None,

268 y=None,

269 type_UQ_params=None,

270 min_cut=0,

271 max_cut=0.97,

272 var_min=0,

273 var_max=None,

274 factor=1,

275 q_var=1,

276 q_Eratio=0.5,

277 E_cut_in_var_nominal=True,

278 )

279

280 A, E = process_UQmeasure_to_TOT_and_E_sigma(

281 UQ,

282 "var_A&E",

283 pred=None,

284 y=None,

285 type_UQ_params=None,

286 var_min=0,

287 var_max=None,

288 min_cut=0,

289 max_cut=1,

290 q_var=1,

291 q_var_e=1,

292 k_var_e=1,

293 ndUQ_ratio=ndUQ_ratio,

294 extremum_var_TOT=extremum_var_TOT,

295 reduc_filter=None,

296 roll=0,

297 )

298 print(extremum_var_TOT)

299

300 A, E = np.power(A, 2), np.power(E, 2)

301

302 Amin = np.quantile(A, 0.01)

303 Amax = np.quantile(A, 0.99)

304 Emin = np.quantile(E, 0.01)

305 Emax = np.quantile(E, 0.99)

306

307 UQ = (A, E)

308 dE, params_ = compute_Epistemic_score(

309 UQ,

310 "var_A&E",

311 pred=None,

312 y=None,

313 type_UQ_params=None,

314 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],

315 var_min=0,

316 var_max=None,

317 min_cut=0.1,

318 max_cut=0.97,

319 q_var=1,

320 q_Eratio=1.01,

321 mode="score",

322 reduc_filter=None,

323 params_=params_,

324 )

325 dE = np.log(dE)

326

327 dEmin = np.quantile(dE, 0.01)

328 dEmax = np.quantile(dE, 0.99)

329

330 grid_A, grid_E = process_UQmeasure_to_TOT_and_E_sigma(

331 UQ_grid,

332 "var_A&E",

333 pred=None,

334 y=None,

335 type_UQ_params=None,

336 var_min=0,

337 var_max=None,

338 min_cut=0,

339 max_cut=1,

340 q_var=1,

341 q_var_e=1,

342 k_var_e=1,

343 ndUQ_ratio=ndUQ_ratio,

344 extremum_var_TOT=extremum_var_TOT,

345 reduc_filter=None,

346 roll=0,

347 )

348

349 grid_A, grid_E = np.power(grid_A, 2), np.power(grid_E, 2)

350

351 UQ_grid = (grid_A, grid_E)

352 grid_dE, params_ = compute_Epistemic_score(

353 UQ_grid,

354 "var_A&E",

355 pred=None,

356 y=None,

357 type_UQ_params=None,

358 list_percent=[0.5, 0.8, 0.95, 0.98, 0.995, 1],

359 var_min=0,

360 var_max=None,

361 min_cut=0.1,

362 max_cut=0.97,

363 q_var=1,

364 q_Eratio=1.01,

365 mode="score",

366 reduc_filter=None,

367 params_=params_,

368 )

369

370 grid_dE = np.log(grid_dE)

371

372 E[E > Emax] = Emax

373 E[E < Emin] = Emin

374 A[A > Amax] = Amax

375 A[A < Amin] = Amin

376 dE[dE > dEmax] = dEmax

377 dE[dE < dEmin] = dEmin

378

379 grid_E[grid_E > Emax] = Emax

380 grid_E[grid_E < Emin] = Emin

381 grid_A[grid_A > Amax] = Amax

382 grid_A[grid_A < Amin] = Amin

383 grid_dE[grid_dE > dEmax] = dEmax

384 grid_dE[grid_dE < dEmin] = dEmin

385

386 A = np.log(A)

387 Amin, Amax = np.log(Amin), np.log(Amax)

388 grid_A = np.log(grid_A)

389

390 return (A, grid_A, Amin, Amax, E, grid_E, Emin, Emax, dE, grid_dE, dEmin, dEmax)

391

392

393def two_dimensional_plot(

394 y,

395 pred,

396 pred_grid,

397 X,

398 grid_X,

399 A,

400 Amin,

401 Amax,

402 dE,

403 dEmin,

404 dEmax,

405 keep,

406 grid_A,

407 pseudo_var,

408 var_min,

409 var_max,

410 grid_E,

411 Emin,

412 Emax,

413 grid_dE,

414 test,

415 color_lvl=10,

416):

417 mask = keep

418

419 plt.figure(figsize=(12, 8))

420 plt.subplot(2, 3, 1)

421 plt.title("Cible à prédire (couleur)")

422 h = plt.scatter(

423 X[test, 0], X[test, 1], c=y[test], vmin=0, vmax=1, s=5, cmap=plt.get_cmap("jet")

424 )

425 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")

426 plt.xlabel("X1")

427 plt.ylabel("X2")

428 plt.subplot(2, 3, 2)

429 plt.title("Prediction")

430 h = plt.scatter(

431 X[test, 0],

432 X[test, 1],

433 c=pred[test],

434 vmin=0,

435 vmax=1,

436 s=5,

437 cmap=plt.get_cmap("jet"),

438 )

439 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="pred_value")

440 plt.xlabel("X1")

441 plt.ylabel("X2")

442 plt.subplot(2, 3, 3)

443 plt.title("Erreur modèle")

444 res = y[test] - pred[test]

445 h = plt.scatter(

446 X[test, 0],

447 X[test, 1],

448 c=res,

449 vmin=-1,

450 vmax=1,

451 cmap=plt.get_cmap("Spectral_r"),

452 s=5,

453 )

454 cbar = plt.colorbar(

455 h, orientation="horizontal", ticks=[res.min(), 0, res.max()], label="residu"

456 )

457 plt.xlabel("X1")

458 plt.ylabel("X2")

459

460 plt.subplot(2, 3, 4)

461 plt.title("train_data")

462 h = plt.scatter(X[:, 0], X[:, 1], c=keep, s=1, cmap=plt.get_cmap("jet"))

463 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="train data")

464 cbar.ax.set_xticklabels(["Seen", "Unseen"])

465 plt.xlabel("X1")

466 plt.ylabel("X2")

467

468 plt.subplot(2, 3, 5)

469 plt.title("Aleatoric")

470 h = plt.scatter(

471 X[test, 0],

472 X[test, 1],

473 c=A[test],

474 cmap=plt.get_cmap("plasma"),

475 vmin=Amin,

476 vmax=Amax,

477 s=5,

478 )

479 cbar = plt.colorbar(

480 h, orientation="horizontal", ticks=[Amin, Amax], label="Var Aleatoric"

481 )

482 cbar.ax.set_xticklabels(["Low", "High"])

483 plt.xlabel("X1")

484 plt.ylabel("X2")

485

486 plt.subplot(2, 3, 6)

487 plt.title("Epipstemic")

488

489 h = plt.scatter(

490 X[test, 0],

491 X[test, 1],

492 c=dE[test],

493 cmap=plt.get_cmap("Spectral_r"),

494 vmin=dEmin,

495 vmax=dEmax,

496 s=1,

497 )

498 cbar = plt.colorbar(h, orientation="horizontal", ticks=[dEmin, dEmax], label="Conf")

499 cbar.ax.set_xticklabels(["Low risk (meduim of train)", "High risk"])

500 plt.xlabel("X1")

501 plt.ylabel("X2")

502 plt.tight_layout()

503 plt.show()

504

505 shape = (200, 200)

506 x1 = grid_X[:, 0].reshape(shape)

507 x2 = grid_X[:, 1].reshape(shape)

508

509 plt.figure(figsize=(15, 15))

510 plt.subplot(2, 2, 1)

511 plt.title("Prediction")

512 h = plt.contourf(

513 x1,

514 x2,

515 pred_grid.reshape(shape),

516 levels=color_lvl,

517 vmin=0,

518 vmax=1,

519 alpha=0.2,

520 cmap=plt.get_cmap("jet"),

521 )

522 cbar = plt.colorbar(h, orientation="horizontal", ticks=[0, 1], label="target")

523 plt.scatter(

524 X[mask, 0], X[mask, 1], c=y[mask], vmin=0, vmax=1, s=1, cmap=plt.get_cmap("jet")

525 )

526

527 plt.subplot(2, 2, 2)

528 plt.title("Incertitude Aleatoric")

529 h = plt.contourf(

530 x1,

531 x2,

532 grid_A.reshape(shape),

533 levels=color_lvl,

534 alpha=0.2,

535 vmin=Amin,

536 vmax=Amax,

537 cmap=plt.get_cmap("plasma", 10),

538 )

539 plt.scatter(

540 X[mask, 0],

541 X[mask, 1],

542 c=pseudo_var[mask],

543 vmin=var_min,

544 vmax=var_max,

545 s=1,

546 cmap=plt.get_cmap("plasma"),

547 )

548 cbar = plt.colorbar(

549 h, orientation="horizontal", ticks=[Amin, Amax], label="Var Aleatoric"

550 )

551

552 plt.subplot(2, 2, 3)

553 plt.title("Confiance Epistemic")

554 h = plt.contourf(

555 x1,

556 x2,

557 grid_E.reshape(shape),

558 levels=color_lvl,

559 alpha=0.2,

560 vmin=Emin,

561 vmax=Emax,

562 cmap=plt.get_cmap("Spectral_r"),

563 )

564 cbar = plt.colorbar(h, orientation="horizontal", ticks=[Emin, Emax], label="Conf")

565 cbar.ax.set_xticklabels(["Low risk", "High risk"])

566 h = plt.scatter(

567 X[mask, 0],

568 X[mask, 1],

569 c=np.invert(keep)[mask],

570 s=1,

571 cmap=plt.get_cmap("jet"),

572 alpha=0.4,

573 )

574

575 plt.subplot(2, 2, 4)

576 plt.title("Confiance D-Epistemic")

577 h = plt.contourf(

578 x1,

579 x2,

580 grid_dE.reshape(shape),

581 levels=color_lvl,

582 alpha=0.2,

583 vmin=dEmin,

584 vmax=dEmax,

585 cmap=plt.get_cmap("Spectral_r"),

586 )

587 cbar = plt.colorbar(h, orientation="horizontal", ticks=[dEmin, dEmax], label="Conf")

588 cbar.ax.set_xticklabels(["Low risk", "High risk"])

589 h = plt.scatter(

590 X[mask, 0],

591 X[mask, 1],

592 c=np.invert(keep)[mask],

593 s=1,

594 cmap=plt.get_cmap("jet"),

595 alpha=0.4,

596 )

597 plt.tight_layout()

598 plt.show()

Coverage for uqmodels / data_generation / Gen_two_dimension_uncertainty_data.py: 52%

225 statements