from __future__ import print_function
from PyRDF.Node import Node
import ROOT
from PyRDF.Proxy import TransformationProxy
import logging
logger = logging.getLogger(__name__)
[docs]class RDataFrame(object):
"""
User interface to the object containing the Python equivalent of ROOT
C++'s RDataFrame class. The purpose of this class is to kickstart the
head node of the computational graph, together with a proxy wrapping it.
"""
[docs] def __new__(cls, *args):
"""
Creates the head node of the graph with the arguments provided by the
user, then returns a proxy to that node.
Args:
*args (list): A list of arguments that were provided by the user
to construct the RDataFrame object.
"""
head_node = HeadNode(*args)
proxy_head = TransformationProxy(head_node)
# Logger debug statements
logger.debug("Created RDataFrame head node and proxy")
return proxy_head
[docs]class HeadNode(Node):
"""
The Python equivalent of ROOT C++'s
RDataFrame class.
Attributes:
args (list): A list of arguments that were provided to construct
the RDataFrame object.
PyRDF's RDataFrame constructor accepts the same arguments as the ROOT's
RDataFrame constructor (see
`RDataFrame <https://root.cern/doc/master/classROOT_1_1RDataFrame.html>`_)
In addition, PyRDF allows you to use Python lists in place of C++ vectors
as arguments of the constructor, example::
PyRDF.RDataFrame("myTree", ["file1.root", "file2.root"])
Raises:
RDataFrameException: An exception raised when input arguments to
the RDataFrame constructor are incorrect.
"""
[docs] def __init__(self, *args):
"""
Creates a new RDataFrame instance for the given arguments.
Args:
*args (list): Variable length argument list to construct the
RDataFrame object.
"""
super(HeadNode, self).__init__(None, None, *args)
args = list(args) # Make args mutable
num_params = len(args)
for i in range(num_params):
# Convert Python list to ROOT CPP vector
if isinstance(args[i], list):
args[i] = self._get_vector_from_list(args[i])
try:
ROOT.ROOT.RDataFrame(*args) # Check if the args are correct
except TypeError as e:
msg = "Error creating the RDataFrame !"
rdf_exception = RDataFrameException(e, msg)
rdf_exception.__cause__ = None
# The above line is to supress the traceback of error 'e'
raise rdf_exception
self.args = args
[docs] def get_branches(self):
"""Gets list of default branches if passed by the user."""
# ROOT Constructor:
# RDataFrame(TTree& tree, defaultBranches = {})
if len(self.args) == 2 and isinstance(self.args[0], ROOT.TTree):
return self.args[1]
# ROOT Constructors:
# RDataFrame(treeName, filenameglob, defaultBranches = {})
# RDataFrame(treename, filenames, defaultBranches = {})
# RDataFrame(treeName, dirPtr, defaultBranches = {})
if len(self.args) == 3:
return self.args[2]
return None
def _get_vector_from_list(self, arg):
"""Converts a python list of strings to a vector."""
reqd_vec = ROOT.std.vector('string')()
for elem in arg:
reqd_vec.push_back(elem)
return reqd_vec
[docs] def get_num_entries(self):
"""
Gets the number of entries in the given dataset.
Returns:
int: This is the computed number of entries in the input dataset.
"""
first_arg = self.args[0]
if isinstance(first_arg, int):
# If there's only one argument
# which is an integer, return it.
return first_arg
elif isinstance(first_arg, ROOT.TTree):
# If the argument is a TTree or TChain,
# get the number of entries from it.
return first_arg.GetEntries()
second_arg = self.args[1]
# Construct a ROOT.TChain object
chain = ROOT.TChain(first_arg)
if isinstance(second_arg, str):
# If the second argument is a string
chain.Add(second_arg)
else:
# If the second argument is a list or vector
for fname in second_arg:
chain.Add(str(fname))
return chain.GetEntries()
[docs] def get_treename(self):
"""
Get name of the TTree.
Returns:
(str, None): Name of the TTree, or :obj:`None` if there is no tree.
"""
first_arg = self.args[0]
if isinstance(first_arg, ROOT.TChain):
# Get name from a given TChain
return first_arg.GetName()
elif isinstance(first_arg, ROOT.TTree):
# Get name directly from the TTree
return first_arg.GetUserInfo().At(0).GetName()
elif isinstance(first_arg, str):
# First argument was the name of the tree
return first_arg
# RDataFrame may have been created without any TTree or TChain
return None
[docs] def get_tree(self):
"""
Get ROOT.TTree instance used as an argument to PyRDF.RDataFrame()
Returns:
(ROOT.TTree, None): instance of the tree used to instantiate the
RDataFrame, or `None` if another object was used. ROOT.Tchain
inherits from ROOT.TTree so that can be the return value as well.
"""
first_arg = self.args[0]
if isinstance(first_arg, ROOT.TTree):
return first_arg
return None
[docs]class RDataFrameException(Exception):
"""
A special type of Exception that shows up for incorrect arguments to
RDataFrame.
"""
[docs] def __init__(self, exception, msg):
"""
Creates a new `RDataFrameException`.
Args:
exception: An exception of type :obj:`Exception` or any child
class of :obj:`Exception`.
msg (str): Message to be printed while raising exception.
"""
super(RDataFrameException, self).__init__(exception)
print(msg)