Week 7 Notebook: Optimizing Other Objectives¶
This week, we will look at optimizing multiple objectives simultaneously. In particular, we will look at pivoting with adversarial neural networks [17, 18, 19].
We will borrow the implementation from: https://github.com/glouppe/paper-learning-to-pivot
import tensorflow.keras as keras
import numpy as np
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import uproot
from tqdm.notebook import tqdm
import yaml
with open('definitions.yml') as file:
# The FullLoader parameter handles the conversion from YAML
# scalar values to Python the dictionary format
definitions = yaml.load(file, Loader=yaml.FullLoader)
features = definitions['features']
spectators = definitions['spectators']
labels = definitions['labels']
nfeatures = definitions['nfeatures']
nspectators = definitions['nspectators']
nlabels = definitions['nlabels']
ntracks = definitions['ntracks']
Define discriminator, regression, and combined adversarial models¶
The combined loss function is $\(L = L_\mathrm{class} - \lambda L_\mathrm{reg}\)$
\(L_\mathrm{class}\) is the loss function for the classification part (categorical cross entropy)
\(L_\mathrm{reg}\) is the loss function for the adversarial part (in this case a regression)
\(\lambda\) is a hyperparamter that controls how important the adversarial part of the loss is compared to the classification part, which we nominally set to 1
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Concatenate, GlobalAveragePooling1D
import tensorflow.keras.backend as K
# define Deep Sets model with Dense Keras layer
inputs = Input(shape=(ntracks, nfeatures,), name='input')
x = BatchNormalization(name='bn_1')(inputs)
x = Dense(64, name='dense_1', activation='relu')(x)
x = Dense(32, name='dense_2', activation='relu')(x)
x = Dense(32, name='dense_3', activation='relu')(x)
# sum over tracks
x = GlobalAveragePooling1D(name='pool_1')(x)
x = Dense(100, name='dense_4', activation='relu')(x)
output = Dense(nlabels, name = 'output', activation='softmax')(x)
keras_model_disc = Model(inputs=inputs, outputs=output)
keras_model_disc.compile(optimizer='adam',
loss='categorical_crossentropy')
# regressor
x = Dense(100, name='dense_5', activation='relu')(keras_model_disc(inputs))
x = Dense(100, name='dense_6', activation='relu')(x)
output_reg = Dense(2, activation='linear', name='mass_pt_reg')(x)
sgd_opt = keras.optimizers.SGD(momentum=0)
keras_model_reg = Model(inputs=inputs, outputs=output_reg)
keras_model_reg.compile(optimizer=sgd_opt,
loss='mse')
# combined model
lam = 1
keras_model_adv = Model(inputs=inputs, outputs=[keras_model_disc(inputs), keras_model_reg(inputs)])
keras_model_adv.compile(optimizer=sgd_opt,
loss=['categorical_crossentropy', 'mse'],
loss_weights = [1, -lam])
print(keras_model_disc.summary())
print(keras_model_reg.summary())
print(keras_model_adv.summary())
2021-10-23 22:13:56.692275: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) [(None, 60, 48)] 0
_________________________________________________________________
bn_1 (BatchNormalization) (None, 60, 48) 192
_________________________________________________________________
dense_1 (Dense) (None, 60, 64) 3136
_________________________________________________________________
dense_2 (Dense) (None, 60, 32) 2080
_________________________________________________________________
dense_3 (Dense) (None, 60, 32) 1056
_________________________________________________________________
pool_1 (GlobalAveragePooling (None, 32) 0
_________________________________________________________________
dense_4 (Dense) (None, 100) 3300
_________________________________________________________________
output (Dense) (None, 2) 202
=================================================================
Total params: 9,966
Trainable params: 9,870
Non-trainable params: 96
_________________________________________________________________
None
Model: "model_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input (InputLayer) [(None, 60, 48)] 0
_________________________________________________________________
model (Functional) (None, 2) 9966
_________________________________________________________________
dense_5 (Dense) (None, 100) 300
_________________________________________________________________
dense_6 (Dense) (None, 100) 10100
_________________________________________________________________
mass_pt_reg (Dense) (None, 2) 202
=================================================================
Total params: 20,568
Trainable params: 20,472
Non-trainable params: 96
_________________________________________________________________
None
Model: "model_2"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input (InputLayer) [(None, 60, 48)] 0
__________________________________________________________________________________________________
model (Functional) (None, 2) 9966 input[0][0]
__________________________________________________________________________________________________
model_1 (Functional) (None, 2) 20568 input[0][0]
==================================================================================================
Total params: 20,568
Trainable params: 20,472
Non-trainable params: 96
__________________________________________________________________________________________________
None
Load data¶
from DataGenerator import DataGenerator
# load training and validation generators
train_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_10.root']
val_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_11.root']
train_generator = DataGenerator(train_files, features, labels, spectators, batch_size=1024, n_dim=ntracks,
remove_mass_pt_window=False,
remove_unlabeled=True, max_entry=5000,
return_spectators=True, scale_mass_pt=[100., 10000.])
val_generator = DataGenerator(val_files, features, labels, spectators, batch_size=1024, n_dim=ntracks,
remove_mass_pt_window=False,
remove_unlabeled=True, max_entry=5000,
return_spectators=True, scale_mass_pt=[100., 10000.])
Pretrain discriminator and regressor models¶
# pretrain discriminator
keras_model_disc.trainable = True
keras_model_disc.compile(optimizer='adam',
loss='categorical_crossentropy')
for n_epoch in tqdm(range(20)):
for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):
keras_model_disc.fit(t[0], t[1][0],verbose=0)
# pretrain regressor
keras_model_reg.trainable = True
keras_model_disc.trainable = False
keras_model_reg.compile(optimizer=sgd_opt, loss='mse')
for n_epoch in tqdm(range(20)):
for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):
keras_model_reg.fit(t[0], t[1][1], verbose=0)
2021-10-23 22:14:10.767857: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-10-23 22:14:10.768638: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2593905000 Hz
Main training loop¶
During the main training loop, we do two things:
Train the discriminator model with the combined loss function $\(L = L_\mathrm{class} - \lambda L_\mathrm{reg}\)$
Train the regression model to learn the mass from with the standard MSE loss function $\(L_\mathrm{reg}\)$
# alternate training discriminator and regressor
for n_epoch in tqdm(range(40)):
for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==39)):
# train discriminator
keras_model_reg.trainable = False
keras_model_disc.trainable = True
keras_model_adv.compile(optimizer=sgd_opt,
loss=['categorical_crossentropy', 'mse'],
loss_weights=[1, -lam])
keras_model_adv.fit(t[0], t[1], verbose=0)
# train regressor
keras_model_reg.trainable = True
keras_model_disc.trainable = False
keras_model_reg.compile(optimizer=sgd_opt, loss='mse')
keras_model_reg.fit(t[0], t[1][1],verbose=0)
keras_model_adv.save_weights('keras_model_adv_best.h5')
Test¶
# load testing file
test_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root']
test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=8192, n_dim=ntracks,
remove_mass_pt_window=True,
remove_unlabeled=True,
return_spectators=True,
max_entry=200000) # basically, no maximum
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
/tmp/ipykernel_8659/2932239055.py in <module>
1 # load testing file
2 test_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root']
----> 3 test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=8192, n_dim=ntracks,
4 remove_mass_pt_window=True,
5 remove_unlabeled=True,
~/work/capstone-particle-physics-domain/capstone-particle-physics-domain/weeks/DataGenerator.py in __init__(self, list_files, features, labels, spectators, batch_size, n_dim, remove_mass_pt_window, remove_unlabeled, return_spectators, max_entry, scale_mass_pt)
30 running_total = 0
31 for i, file_name in enumerate(self.list_files):
---> 32 with uproot.open(file_name, **get_file_handler(file_name)) as root_file:
33 self.open_files.append(root_file)
34 tree = root_file['deepntuplizer/tree']
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/reading.py in open(path, object_cache, array_cache, custom_classes, decompression_executor, interpretation_executor, **options)
147 )
148
--> 149 file = ReadOnlyFile(
150 file_path,
151 object_cache=object_cache,
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/reading.py in __init__(self, file_path, object_cache, array_cache, custom_classes, decompression_executor, interpretation_executor, **options)
588 file_path, self._options
589 )
--> 590 self._source = Source(
591 file_path, **self._options # NOTE: a comma after **options breaks Python 2
592 )
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in __init__(self, file_path, **options)
445 self._file_path = file_path
446 self._num_bytes = None
--> 447 self._open()
448
449 def _open(self):
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _open(self)
449 def _open(self):
450 self._executor = uproot.source.futures.ResourceThreadPoolExecutor(
--> 451 [
452 XRootDResource(self._file_path, self._timeout)
453 for x in uproot._util.range(self._num_workers)
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in <listcomp>(.0)
450 self._executor = uproot.source.futures.ResourceThreadPoolExecutor(
451 [
--> 452 XRootDResource(self._file_path, self._timeout)
453 for x in uproot._util.range(self._num_workers)
454 ]
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in __init__(self, file_path, timeout)
80 self._file_path = file_path
81 self._timeout = timeout
---> 82 self._open()
83
84 def _open(self):
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _open(self)
89 status, dummy = self._file.open(self._file_path, timeout=self._xrd_timeout())
90 if status.error:
---> 91 self._xrd_error(status)
92
93 def __getstate__(self):
/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _xrd_error(self, status)
115
116 else:
--> 117 raise OSError(
118 """XRootD error: {0}
119 in file {1}""".format(
OSError: XRootD error: [ERROR] Operation expired
in file root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root
# run model inference on test data set
predict_array_adv = []
label_array_test = []
spec_array_test = []
for t in tqdm(test_generator, total=len(test_generator)):
label_array_test.append(t[1][0])
spec_array_test.append(t[1][1])
predict_array_adv.append(keras_model_adv.predict(t[0])[0])
predict_array_adv = np.concatenate(predict_array_adv, axis=0)
label_array_test = np.concatenate(label_array_test, axis=0)
spec_array_test = np.concatenate(spec_array_test, axis=0)
# create ROC curves
print(label_array_test.shape)
print(spec_array_test.shape)
print(predict_array_adv.shape)
fpr_adv, tpr_adv, threshold_adv = roc_curve(label_array_test[:,1], predict_array_adv[:,1])
# plot ROC curves
plt.figure()
plt.plot(tpr_adv, fpr_adv, lw=2.5, label="Adversarial, AUC = {:.1f}%".format(auc(fpr_adv,tpr_adv)*100))
plt.xlabel(r'True positive rate')
plt.ylabel(r'False positive rate')
plt.semilogy()
plt.ylim(0.001, 1)
plt.xlim(0, 1)
plt.grid(True)
plt.legend(loc='upper left')
plt.show()
from utils import find_nearest
plt.figure()
for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:
idx, val = find_nearest(fpr_adv, wp)
plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21),
weights=label_array_test[:,0]*(predict_array_adv[:,1] > threshold_adv[idx]),
alpha=0.4, density=True, label='QCD, {}% FPR cut'.format(int(wp*100)),linestyle='-')
plt.legend()
plt.xlabel(r'$m_{SD}$')
plt.ylabel(r'Normalized probability')
plt.xlim(40, 200)
plt.figure()
for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:
idx, val = find_nearest(fpr_adv, wp)
plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21),
weights=label_array_test[:,1]*(predict_array_adv[:,1] > threshold_adv[idx]),
alpha=0.4, density=True, label='H(bb), {}% FPR cut'.format(int(wp*100)),linestyle='-')
plt.legend()
plt.xlabel(r'$m_{SD}$')
plt.ylabel(r'Normalized probability')
plt.xlim(40, 200)
plt.show()
plt.figure()
plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21),
weights=label_array_test[:,1]*0.1,
alpha=0.4, linestyle='-', label='H(bb)')
plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21),
weights=label_array_test[:,0],
alpha=0.4, linestyle='-', label='QCD')
plt.legend()
plt.show()
plt.figure()
plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21),
weights = label_array_test[:,1]*0.1,
alpha=0.4, linestyle='-', label='H(bb)')
plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21),
weights = label_array_test[:,0],
alpha=0.4, linestyle='-', label='QCD')
plt.legend()
plt.show()