#!/usr/bin/env python #--- TrainEvaluateMVA.py ------------------------------------------- # # template code to use the TMVA package of ROOT # adapted by GQ from TMVA Tutorial and example code by Andre Holzner # # - generates signal and background samples # - sets up TMVA # - some plotting # - starts TMVAGui to display result plots # # Version History: # 03-Jun-15 GQ initial version # # # NOTE: INITIALIZE # source # # ------------------------------------------------------------------- import numpy as np import sys # ------ Helper Function to some generate Data def ranGaus2D(mux=0.,muy=0.,sigx=1.,sigy=1.,rho=0): # 2D-Gaussian normal distribution u1=ROOT.gRandom.Gaus(0.,1.) u2=ROOT.gRandom.Gaus(0.,1.) x=mux+sigx*u1 y=muy+sigy*( np.sqrt(1-rho*rho)*u2 + rho*u1) return x,y def SmearedCircle(x0=0., y0=0., r=1., sigr=1.,fseg=1.): theta=fseg*np.pi * ROOT.gRandom.Rndm() x=x0+r*np.cos(theta) + sigr* ROOT.gRandom.Gaus() y=y0+r*np.sin(theta) + sigr* ROOT.gRandom.Gaus() return x,y # ------ End Helper Functions for data Generation # ------------------------------------------------ if len(sys.argv) < 2: print "Give one argument \"Circle\"or \"Gaussian\"" sys.exit(2) if len(sys.argv) > 2: print "One argument only please" sys.exit(2) if sys.argv[1] != "Gaussian" and sys.argv[1] != 'Circle': print "Please enter Gaussian or Circle as argument" sys.exit(2) # ------ set up ROOT import ROOT # style options for plotting ROOT.gStyle.SetOptStat(0) # no statistics boxes # --------------------------------------------------- # generate test data (Gaussian or Circle) # --------------------------------------------------- ntuple = ROOT.TNtuple("ntuple","ntuple","x:y:z:signal") # create a TNtuple # 'signal' and 'background' distributions (3-dim) nevent=10000 for i in range(nevent): if sys.argv[1] == 'Gaussian': # ------------------------------------- # a) Gaussian (Signal) x0=0 y0=1. sigx=1. sigy=1. rho=0.75 x,y=ranGaus2D(x0,y0,sigx,sigy,rho) # z=ROOT.gRandom.Rndm() # a third variable with a flat distribution z=1. ntuple.Fill(x,y,z,1) # Gaussian (Background) x0=0 y0=-1. sigx=1. sigy=1. rho=0.75 x,y=ranGaus2D(x0,y0,sigx,sigy,rho) # z=np.sqrt(ROOT.gRandom.Rndm()) # triangular for third variable z=1. ntuple.Fill(x,y,z,0) else: # ------------------------------------- # b) Circle with Gaussian spread (Signal) x0=-1. y0=-1. r=2. # radius sigr=0.25 # sigma of Gaussian smearing fseg=2. # angular range in units of pi x,y=SmearedCircle(x0, y0, r, sigr, fseg) z=np.sqrt(ROOT.gRandom.Rndm()) # triangular for third variable ntuple.Fill(x,y,z,1) # Circle (Background) x0=1. y0=1. r=2. # radius sigr=0.25 # sigma of Gaussian smearing fseg=2. # angular range in units of pi x,y=SmearedCircle(x0, y0, r, sigr, fseg) z=np.sqrt(ROOT.gRandom.Rndm()) # triangular for third variable ntuple.Fill(x,y,z,0) # --------------------------------------------------- # visualize data # --------------------------------------------------- c1=ROOT.TCanvas("Data") # create a new TCanvas c1.cd() # draw an empty 2D histogram for the axes histo = ROOT.TH2F("histo","",1,-5,5,1,-5,5) histo.Draw() if nevent<300: ntuple.SetMarkerStyle(8) # draw the signal events in red ntuple.SetMarkerColor(ROOT.kRed) ntuple.Draw("y:x","signal > 0.5","same") # draw the background events in blue ntuple.SetMarkerColor(ROOT.kBlue) ntuple.Draw("y:x","signal <= 0.5","same") c1.Update() #raw_input('Press to continue -> ') # --------------------------------------------------- # set up TMVA "factory" # --------------------------------------------------- ROOT.TMVA.Tools.Instance() # note that it seems to be mandatory to have an # output file, just passing None to TMVA::Factory(..) # does not work. Make sure you don't overwrite an # existing file. fout = ROOT.TFile("MVAtest.root","RECREATE") factory = ROOT.TMVA.Factory("TMVAClassification", fout, ":".join([ "!V", "!Silent", "Color", "DrawProgressBar", "Transformations=I;D;P;G,D", "AnalysisType=Classification"] )) dataloader = ROOT.TMVA.DataLoader("dataset") # --------------------------------------------------- # add variables and define training samples # --------------------------------------------------- dataloader.AddVariable("x","F") dataloader.AddVariable("y","F") #dataloader.AddVariable("z","F") dataloader.AddSignalTree(ntuple) dataloader.AddBackgroundTree(ntuple) # cuts defining the signal and background sample sigCut = ROOT.TCut("signal > 0.5") bgCut = ROOT.TCut("signal <= 0.5") dataloader.PrepareTrainingAndTestTree(sigCut, # signal events bgCut, # background events ":".join([ "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", "!V" ])) # --------------------------------------------------- # book a selction of methods # options are taken from defaults in TMVAguide # --------------------------------------------------- factory.BookMethod(dataloader,ROOT.TMVA.Types.kLikelihood, "Likelihood", ":".join(["H", "!V", # "TransformOutput", "PDFInterpol=Spline2", "NSmoothSig[0]=20", "NSmoothBkg[0]=20", "NSmoothBkg[1]=10", "NSmooth=1", "NAvEvtPerBin=50", "VarTransform=Decorrelate" ])) factory.BookMethod(dataloader,ROOT.TMVA.Types.kKNN, "nearestNeighbour", ":".join(["H","!V", "nkNN=20", "ScaleFrac=0.8", "SigmaFact=1.0", "Kernel=Gaus:UseKernel=F", "UseWeight=T", "!Trim" ])) factory.BookMethod(dataloader,ROOT.TMVA.Types.kFisher, "Fisher", ":".join(["H","!V", "Fisher", "VarTransform=None", "CreateMVAPdfs", "PDFInterpolMVAPdf=Spline2", "NbinsMVAPdf=100", "NsmoothMVAPdf=10" ])) factory.BookMethod(dataloader,ROOT.TMVA.Types.kBDT, "BDT", ":".join(["H","!V", "NTrees=500", # try 1, 10, 100 or 1000 "nEventsMin=150", "MaxDepth=3", "BoostType=AdaBoost", "AdaBoostBeta=0.5", "SeparationType=GiniIndex", "nCuts=20", "PruneMethod=NoPruning" ])) factory.BookMethod(dataloader,ROOT.TMVA.Types.kMLP, "MLP", ":".join(["H","!V", "NeuronType=sigmoid", "VarTransform=N", "NCycles=600", "HiddenLayers=N+5", "TestRate=5", "!UseRegulator" ])) factory.BookMethod(dataloader,ROOT.TMVA.Types.kSVM, "SVM", ":".join(["H","!V", "Gamma=0.25", "Tol=0.001", ])) # ------------------------------------------------------------- # perform training for all booked methods and evaluate results # ------------------------------------------------------------- factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() # ------------------------------------------------------------- # at this stage, the output of this script is stored in # weights/TMVAClassification_BDT.weights.xml # end of training # ------------------------------------------------------------- # Analysis part - using TMVA Reader # ------------------------------------------------------------- def plotResponse(meth,hist): # function to plot classifier response for i in range(1,hist.GetNbinsX() + 1): for j in range(1,hist.GetNbinsY() + 1): varx[0] = hist.GetXaxis().GetBinCenter(i) vary[0] = hist.GetYaxis().GetBinCenter(j) # set the bin content equal to the classifier output hist.SetBinContent(i,j,reader.EvaluateMVA(meth)) # Book a reader reader = ROOT.TMVA.Reader() import array varx = array.array('f',[0]) ; reader.AddVariable("x",varx) vary = array.array('f',[0]) ; reader.AddVariable("y",vary) # plot response for methods in this tuple methods=("Likelihood","nearestNeighbour","Fisher","BDT","MLP","SVM") # create a new 2D histogram with fine binning c2=ROOT.TCanvas("Classifier Response") c2.Divide(3,2) h=[] for i, m in enumerate(methods): c2.cd(i+1) h.append(ROOT.TH2F("histo"+m,"",200,-5,5,200,-5,5)) reader.BookMVA(m,"dataset/weights/TMVAClassification_"+m+".weights.xml") plotResponse(m,h[i]) h[i].SetTitle(m) h[i].DrawCopy("cont1") # overlay the data - signal events in red ntuple.SetMarkerColor(ROOT.kRed) ntuple.Draw("y:x","signal > 0.5","same") # and background events in blue ntuple.SetMarkerColor(ROOT.kBlue) ntuple.Draw("y:x","signal <= 0.5","same") c2.Update() c2.Print("ClassifierResponse.pdf") raw_input('Press to continue -> ') # close output file fout.Close() # start TMVAGui to investigate results ROOT.TMVA.TMVAGui("MVAtest.root") raw_input('Press to continue -> ')