from __future__ import print_function
from PyRDF.Operation import Operation
import logging
logger = logging.getLogger(__name__)
[docs]class Node(object):
"""
A Class that represents a node in RDataFrame operations graph. A Node
houses an operation and has references to children nodes.
For details on the types of operations supported, try :
Example::
import PyRDF
PyRDF.use(...) # Choose your backend
print(PyRDF.current_backend.supported_operations)
Attributes:
get_head (function): A lambda function that returns the head node of
the current graph.
operation: The operation that this Node represents.
This could be :obj:`None`.
children (list): A list of :obj:`PyRDF.Node` objects which represent
the children nodes connected to the current node.
_new_op_name (str): The name of the new incoming operation of the next
child, which is the last child node among the current node's
children.
value: The computed value after executing the operation in the current
node for a particular PyRDF graph. This is permanently :obj:`None`
for transformation nodes and the action nodes get a
:obj:`ROOT.RResultPtr` after event-loop execution.
pyroot_node: Reference to the PyROOT object that implements the
functionality of this node on the cpp side.
has_user_references (bool): A flag to check whether the node has
direct user references, that is if it is assigned to a variable.
Default value is :obj:`True`, turns to :obj:`False` if the proxy
that wraps the node gets garbage collected by Python.
"""
[docs] def __init__(self, get_head, operation, *args):
"""
Creates a new node based on the operation passed as argument.
Args:
get_head (function): A lambda function that returns the head node
of the current graph. This value could be `None`.
operation (PyRDF.Operation.Operation): The operation that this Node
represents. This could be :obj:`None`.
"""
if get_head is None:
# Function to get 'head' Node
self.get_head = lambda: self
else:
self.get_head = get_head
self.operation = operation
self.children = []
self._new_op_name = ""
self.value = None
self.pyroot_node = None
self.has_user_references = True
[docs] def __getstate__(self):
"""
Converts the state of the current node
to a Python dictionary.
Returns:
dictionary: A dictionary that stores all instance variables
that represent the current PyRDF node.
"""
state_dict = {'children': self.children}
if self.operation:
state_dict['operation_name'] = self.operation.name
state_dict['operation_args'] = self.operation.args
state_dict['operation_kwargs'] = self.operation.kwargs
return state_dict
[docs] def __setstate__(self, state):
"""
Retrieves the state dictionary of the current
node and sets the instance variables.
Args:
state (dict): This is the state dictionary that needs to be
converted to a `Node` object.
"""
self.children = state['children']
if state.get('operation_name'):
self.operation = Operation(state['operation_name'],
*state['operation_args'],
**state["operation_kwargs"])
else:
self.operation = None
[docs] def is_prunable(self):
"""
Checks whether the current node can be pruned from the computational
graph.
Returns:
bool: True if the node has no children and no user references or
its value has already been computed, False otherwise.
"""
if not self.children:
# Every pruning condition is written on a separate line
if not self.has_user_references or \
(self.operation and self.operation.is_action() and self.value):
# ***** Condition 1 *****
# If the node is wrapped by a proxy which is not directly
# assigned to a variable, then it will be flagged for pruning
# ***** Condition 2 *****
# If the current node's value was already
# computed, it should get pruned only if it's
# an Action node.
# Logger debug statements
logger.debug("{} node can be pruned".format(
self.operation.name
))
return True
# Logger debug statements
if self.operation: # Node has an operation
logger.debug("{} node shouldn't be pruned".format(
self.operation.name
))
else: # Node is the RDataFrame
logger.debug("Graph pruning completed")
return False
[docs] def graph_prune(self):
"""
Prunes nodes from the current PyRDF graph under certain conditions.
The current node will be pruned if it has no children and the user
application does not hold any reference to it. The children of the
current node will get recursively pruned.
Returns:
bool: True if the current node has to be pruned, False otherwise.
"""
children = []
# Logger debug statements
if self.operation:
logger.debug("Checking {} node for pruning".format(
self.operation.name
))
else:
logger.debug("Starting computational graph pruning")
for n in self.children:
# Logger debug statement
# Select children based on pruning condition
if not n.graph_prune():
children.append(n)
self.children = children
return self.is_prunable()