Week 7 Notebook: Optimizing Other Objectives

This week, we will look at optimizing multiple objectives simultaneously. In particular, we will look at pivoting with adversarial neural networks [17, 18, 19].

We will borrow the implementation from: https://github.com/glouppe/paper-learning-to-pivot

import tensorflow.keras as keras
import numpy as np
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import uproot
from tqdm.notebook import tqdm
import yaml

with open('definitions.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    definitions = yaml.load(file, Loader=yaml.FullLoader)
    
features = definitions['features']
spectators = definitions['spectators']
labels = definitions['labels']

nfeatures = definitions['nfeatures']
nspectators = definitions['nspectators']
nlabels = definitions['nlabels']
ntracks = definitions['ntracks']

Define discriminator, regression, and combined adversarial models

The combined loss function is $\(L = L_\mathrm{class} - \lambda L_\mathrm{reg}\)$

  • \(L_\mathrm{class}\) is the loss function for the classification part (categorical cross entropy)

  • \(L_\mathrm{reg}\) is the loss function for the adversarial part (in this case a regression)

  • \(\lambda\) is a hyperparamter that controls how important the adversarial part of the loss is compared to the classification part, which we nominally set to 1

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Concatenate, GlobalAveragePooling1D
import tensorflow.keras.backend as K

# define Deep Sets model with Dense Keras layer
inputs = Input(shape=(ntracks, nfeatures,), name='input')  
x = BatchNormalization(name='bn_1')(inputs)
x = Dense(64, name='dense_1', activation='relu')(x)
x = Dense(32, name='dense_2', activation='relu')(x)
x = Dense(32, name='dense_3', activation='relu')(x)
# sum over tracks
x = GlobalAveragePooling1D(name='pool_1')(x)
x = Dense(100, name='dense_4', activation='relu')(x)
output = Dense(nlabels, name = 'output', activation='softmax')(x)
    
keras_model_disc = Model(inputs=inputs, outputs=output)
keras_model_disc.compile(optimizer='adam',
                        loss='categorical_crossentropy')

# regressor
x = Dense(100, name='dense_5', activation='relu')(keras_model_disc(inputs))
x = Dense(100, name='dense_6', activation='relu')(x)
output_reg = Dense(2, activation='linear', name='mass_pt_reg')(x)
                                                            

sgd_opt = keras.optimizers.SGD(momentum=0)
keras_model_reg = Model(inputs=inputs, outputs=output_reg)
keras_model_reg.compile(optimizer=sgd_opt,
                        loss='mse')

# combined model
lam = 1
keras_model_adv = Model(inputs=inputs, outputs=[keras_model_disc(inputs), keras_model_reg(inputs)])
keras_model_adv.compile(optimizer=sgd_opt, 
                        loss=['categorical_crossentropy', 'mse'],
                        loss_weights = [1, -lam])                              

print(keras_model_disc.summary())
print(keras_model_reg.summary())
print(keras_model_adv.summary())
2021-10-23 22:13:56.692275: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input (InputLayer)           [(None, 60, 48)]          0         
_________________________________________________________________
bn_1 (BatchNormalization)    (None, 60, 48)            192       
_________________________________________________________________
dense_1 (Dense)              (None, 60, 64)            3136      
_________________________________________________________________
dense_2 (Dense)              (None, 60, 32)            2080      
_________________________________________________________________
dense_3 (Dense)              (None, 60, 32)            1056      
_________________________________________________________________
pool_1 (GlobalAveragePooling (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 100)               3300      
_________________________________________________________________
output (Dense)               (None, 2)                 202       
=================================================================
Total params: 9,966
Trainable params: 9,870
Non-trainable params: 96
_________________________________________________________________
None
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input (InputLayer)           [(None, 60, 48)]          0         
_________________________________________________________________
model (Functional)           (None, 2)                 9966      
_________________________________________________________________
dense_5 (Dense)              (None, 100)               300       
_________________________________________________________________
dense_6 (Dense)              (None, 100)               10100     
_________________________________________________________________
mass_pt_reg (Dense)          (None, 2)                 202       
=================================================================
Total params: 20,568
Trainable params: 20,472
Non-trainable params: 96
_________________________________________________________________
None
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input (InputLayer)              [(None, 60, 48)]     0                                            
__________________________________________________________________________________________________
model (Functional)              (None, 2)            9966        input[0][0]                      
__________________________________________________________________________________________________
model_1 (Functional)            (None, 2)            20568       input[0][0]                      
==================================================================================================
Total params: 20,568
Trainable params: 20,472
Non-trainable params: 96
__________________________________________________________________________________________________
None

Load data

from DataGenerator import DataGenerator
# load training and validation generators 
train_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_10.root']
val_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/train/ntuple_merged_11.root']


train_generator = DataGenerator(train_files, features, labels, spectators, batch_size=1024, n_dim=ntracks, 
                                remove_mass_pt_window=False, 
                                remove_unlabeled=True, max_entry=5000,
                                return_spectators=True, scale_mass_pt=[100., 10000.])

val_generator = DataGenerator(val_files, features, labels, spectators, batch_size=1024, n_dim=ntracks, 
                              remove_mass_pt_window=False, 
                              remove_unlabeled=True, max_entry=5000, 
                              return_spectators=True, scale_mass_pt=[100., 10000.])

Pretrain discriminator and regressor models

# pretrain discriminator
keras_model_disc.trainable = True
keras_model_disc.compile(optimizer='adam',
                         loss='categorical_crossentropy')
for n_epoch in tqdm(range(20)):
    for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):
        keras_model_disc.fit(t[0], t[1][0],verbose=0)        
        
# pretrain regressor
keras_model_reg.trainable = True
keras_model_disc.trainable = False
keras_model_reg.compile(optimizer=sgd_opt, loss='mse')
for n_epoch in tqdm(range(20)):
    for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==19)):
        keras_model_reg.fit(t[0], t[1][1], verbose=0)                 
2021-10-23 22:14:10.767857: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-10-23 22:14:10.768638: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2593905000 Hz

Main training loop

During the main training loop, we do two things:

  1. Train the discriminator model with the combined loss function $\(L = L_\mathrm{class} - \lambda L_\mathrm{reg}\)$

  2. Train the regression model to learn the mass from with the standard MSE loss function $\(L_\mathrm{reg}\)$

# alternate training discriminator and regressor                    
for n_epoch in tqdm(range(40)):
    for t in tqdm(train_generator, total=len(train_generator), leave=bool(n_epoch==39)):
        # train discriminator
        keras_model_reg.trainable = False
        keras_model_disc.trainable = True
        keras_model_adv.compile(optimizer=sgd_opt, 
                        loss=['categorical_crossentropy', 'mse'],
                        loss_weights=[1, -lam])    
        keras_model_adv.fit(t[0], t[1], verbose=0)

        # train regressor
        keras_model_reg.trainable = True
        keras_model_disc.trainable = False
        keras_model_reg.compile(optimizer=sgd_opt, loss='mse')
        keras_model_reg.fit(t[0], t[1][1],verbose=0)
keras_model_adv.save_weights('keras_model_adv_best.h5')

Test

# load testing file
test_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root']
test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=8192, n_dim=ntracks, 
                               remove_mass_pt_window=True, 
                               remove_unlabeled=True,
                               return_spectators=True,
                               max_entry=200000) # basically, no maximum
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
/tmp/ipykernel_8659/2932239055.py in <module>
      1 # load testing file
      2 test_files = ['root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root']
----> 3 test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=8192, n_dim=ntracks, 
      4                                remove_mass_pt_window=True,
      5                                remove_unlabeled=True,

~/work/capstone-particle-physics-domain/capstone-particle-physics-domain/weeks/DataGenerator.py in __init__(self, list_files, features, labels, spectators, batch_size, n_dim, remove_mass_pt_window, remove_unlabeled, return_spectators, max_entry, scale_mass_pt)
     30         running_total = 0
     31         for i, file_name in enumerate(self.list_files):
---> 32             with uproot.open(file_name, **get_file_handler(file_name)) as root_file:
     33                 self.open_files.append(root_file)
     34                 tree = root_file['deepntuplizer/tree']

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/reading.py in open(path, object_cache, array_cache, custom_classes, decompression_executor, interpretation_executor, **options)
    147         )
    148 
--> 149     file = ReadOnlyFile(
    150         file_path,
    151         object_cache=object_cache,

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/reading.py in __init__(self, file_path, object_cache, array_cache, custom_classes, decompression_executor, interpretation_executor, **options)
    588             file_path, self._options
    589         )
--> 590         self._source = Source(
    591             file_path, **self._options  # NOTE: a comma after **options breaks Python 2
    592         )

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in __init__(self, file_path, **options)
    445         self._file_path = file_path
    446         self._num_bytes = None
--> 447         self._open()
    448 
    449     def _open(self):

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _open(self)
    449     def _open(self):
    450         self._executor = uproot.source.futures.ResourceThreadPoolExecutor(
--> 451             [
    452                 XRootDResource(self._file_path, self._timeout)
    453                 for x in uproot._util.range(self._num_workers)

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in <listcomp>(.0)
    450         self._executor = uproot.source.futures.ResourceThreadPoolExecutor(
    451             [
--> 452                 XRootDResource(self._file_path, self._timeout)
    453                 for x in uproot._util.range(self._num_workers)
    454             ]

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in __init__(self, file_path, timeout)
     80         self._file_path = file_path
     81         self._timeout = timeout
---> 82         self._open()
     83 
     84     def _open(self):

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _open(self)
     89         status, dummy = self._file.open(self._file_path, timeout=self._xrd_timeout())
     90         if status.error:
---> 91             self._xrd_error(status)
     92 
     93     def __getstate__(self):

/usr/share/miniconda/envs/ml-latest/lib/python3.9/site-packages/uproot/source/xrootd.py in _xrd_error(self, status)
    115 
    116         else:
--> 117             raise OSError(
    118                 """XRootD error: {0}
    119 in file {1}""".format(

OSError: XRootD error: [ERROR] Operation expired
in file root://eospublic.cern.ch//eos/opendata/cms/datascience/HiggsToBBNtupleProducerTool/HiggsToBBNTuple_HiggsToBB_QCD_RunII_13TeV_MC/test/ntuple_merged_0.root
# run model inference on test data set
predict_array_adv = []
label_array_test = []
spec_array_test = []

for t in tqdm(test_generator, total=len(test_generator)):
    label_array_test.append(t[1][0])
    spec_array_test.append(t[1][1])
    predict_array_adv.append(keras_model_adv.predict(t[0])[0])
predict_array_adv = np.concatenate(predict_array_adv, axis=0)
label_array_test = np.concatenate(label_array_test, axis=0)
spec_array_test = np.concatenate(spec_array_test, axis=0)
# create ROC curves
print(label_array_test.shape)
print(spec_array_test.shape)
print(predict_array_adv.shape)
fpr_adv, tpr_adv, threshold_adv = roc_curve(label_array_test[:,1], predict_array_adv[:,1])
    
# plot ROC curves
plt.figure()
plt.plot(tpr_adv, fpr_adv, lw=2.5, label="Adversarial, AUC = {:.1f}%".format(auc(fpr_adv,tpr_adv)*100))
plt.xlabel(r'True positive rate')
plt.ylabel(r'False positive rate')
plt.semilogy()
plt.ylim(0.001, 1)
plt.xlim(0, 1)
plt.grid(True)
plt.legend(loc='upper left')
plt.show()
from utils import find_nearest
plt.figure()
for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:
    idx, val = find_nearest(fpr_adv, wp)
    plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21), 
             weights=label_array_test[:,0]*(predict_array_adv[:,1] > threshold_adv[idx]),
             alpha=0.4, density=True, label='QCD, {}% FPR cut'.format(int(wp*100)),linestyle='-')
plt.legend()
plt.xlabel(r'$m_{SD}$')
plt.ylabel(r'Normalized probability')
plt.xlim(40, 200)

plt.figure()
for wp in [1.0, 0.5, 0.3, 0.1, 0.05]:
    idx, val = find_nearest(fpr_adv, wp)
    plt.hist(spec_array_test[:,0], bins=np.linspace(40, 200, 21), 
             weights=label_array_test[:,1]*(predict_array_adv[:,1] > threshold_adv[idx]),
             alpha=0.4, density=True, label='H(bb), {}% FPR cut'.format(int(wp*100)),linestyle='-')
plt.legend()
plt.xlabel(r'$m_{SD}$')
plt.ylabel(r'Normalized probability')
plt.xlim(40, 200)
plt.show()

plt.figure()
plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21), 
             weights=label_array_test[:,1]*0.1,
             alpha=0.4, linestyle='-', label='H(bb)')
plt.hist(predict_array_adv[:,1], bins = np.linspace(0, 1, 21), 
             weights=label_array_test[:,0],
             alpha=0.4, linestyle='-', label='QCD')
plt.legend()
plt.show()


plt.figure()
plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21), 
             weights = label_array_test[:,1]*0.1,
             alpha=0.4, linestyle='-', label='H(bb)')
plt.hist(spec_array_test[:,0], bins = np.linspace(40, 200, 21), 
             weights = label_array_test[:,0],
             alpha=0.4, linestyle='-', label='QCD')
plt.legend()
plt.show()