aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCathy Yeh <cathy@driver.xyz>2017-12-13 18:47:32 -0800
committerCathy Yeh <cathy@driver.xyz>2017-12-14 15:39:52 -0800
commit7053fefc6f9e43b1e252d1f551401a7a70b52e93 (patch)
tree2b232454fbc71f4acc877cbf03f61565bae88b98
parent10f5c49ea6767f54d59f88eb4064bb4959d14c6b (diff)
downloadbeliefs-7053fefc6f9e43b1e252d1f551401a7a70b52e93.tar.gz
beliefs-7053fefc6f9e43b1e252d1f551401a7a70b52e93.tar.bz2
beliefs-7053fefc6f9e43b1e252d1f551401a7a70b52e93.zip
cleanup print statements, stale comments, minor TODOs
-rw-r--r--beliefs/factors/bernoulli_and_cpd.py7
-rw-r--r--beliefs/factors/bernoulli_or_cpd.py7
-rw-r--r--beliefs/factors/cpd.py29
-rw-r--r--beliefs/factors/discrete_factor.py32
-rw-r--r--beliefs/inference/belief_propagation.py77
-rw-r--r--beliefs/models/base_models.py90
-rw-r--r--beliefs/models/belief_update_node_model.py238
-rw-r--r--beliefs/utils/math_helper.py14
-rw-r--r--beliefs/utils/random_variables.py17
9 files changed, 306 insertions, 205 deletions
diff --git a/beliefs/factors/bernoulli_and_cpd.py b/beliefs/factors/bernoulli_and_cpd.py
index 15802c2..291398f 100644
--- a/beliefs/factors/bernoulli_and_cpd.py
+++ b/beliefs/factors/bernoulli_and_cpd.py
@@ -12,9 +12,10 @@ class BernoulliAndCPD(TabularCPD):
"""
def __init__(self, variable, parents=[]):
"""
- Args:
- variable: int or string
- parents: optional, list of int and/or strings
+ Args
+ variable: int or string
+ parents: list,
+ (optional) list of int and/or strings
"""
super().__init__(variable=variable,
variable_card=2,
diff --git a/beliefs/factors/bernoulli_or_cpd.py b/beliefs/factors/bernoulli_or_cpd.py
index 5b661a1..b5e6ae5 100644
--- a/beliefs/factors/bernoulli_or_cpd.py
+++ b/beliefs/factors/bernoulli_or_cpd.py
@@ -12,9 +12,10 @@ class BernoulliOrCPD(TabularCPD):
"""
def __init__(self, variable, parents=[]):
"""
- Args:
- variable: int or string
- parents: optional, list of int and/or strings
+ Args
+ variable: int or string
+ parents: list,
+ (optional) list of int and/or strings
"""
super().__init__(variable=variable,
variable_card=2,
diff --git a/beliefs/factors/cpd.py b/beliefs/factors/cpd.py
index 9e7191f..c7883c9 100644
--- a/beliefs/factors/cpd.py
+++ b/beliefs/factors/cpd.py
@@ -1,3 +1,4 @@
+import copy
import numpy as np
from beliefs.factors.discrete_factor import DiscreteFactor
@@ -7,16 +8,18 @@ class TabularCPD(DiscreteFactor):
Defines the conditional probability table for a discrete variable
whose parents are also discrete.
"""
- def __init__(self, variable, variable_card,
- parents=[], parents_card=[], values=[], state_names=None):
+ def __init__(self, variable, variable_card, parents=[], parents_card=[],
+ values=[], state_names=None):
"""
- Args:
- variable: int or string
- variable_card: int
- parents: optional, list of int and/or strings
- parents_card: optional, list of int
- values: optional, 2d list or array
- state_names: dictionary (optional),
+ Args
+ variable: int or string
+ variable_card: int
+ parents: list,
+ (optional) list of int and/or strings
+ parents_card: list,
+ (optional) list of int
+ values: 2-d list or array (optional)
+ state_names: dictionary (optional),
mapping variables to their states, of format {label_name: ['state1', 'state2']}
"""
super().__init__(variables=[variable] + parents,
@@ -24,7 +27,7 @@ class TabularCPD(DiscreteFactor):
values=values,
state_names=state_names)
self.variable = variable
- self.parents = parents
+ self.parents = list(parents)
def get_values(self):
"""
@@ -36,8 +39,4 @@ class TabularCPD(DiscreteFactor):
return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:]))
def copy(self):
- return self.__class__(self.variable,
- self.cardinality[0],
- self.parents,
- self.cardinality[1:],
- self._values)
+ return copy.deepcopy(self)
diff --git a/beliefs/factors/discrete_factor.py b/beliefs/factors/discrete_factor.py
index b75da28..708f00c 100644
--- a/beliefs/factors/discrete_factor.py
+++ b/beliefs/factors/discrete_factor.py
@@ -18,7 +18,7 @@ class DiscreteFactor:
mapping variables to their states, of format {label_name: ['state1', 'state2']}
"""
self.variables = list(variables)
- self.cardinality = cardinality
+ self.cardinality = list(cardinality)
if values is None:
self._values = None
else:
@@ -28,6 +28,13 @@ class DiscreteFactor:
def __mul__(self, other):
return self.product(other)
+ def copy(self):
+ """Return a copy of the factor"""
+ return self.__class__(self.variables,
+ self.cardinality,
+ self._values,
+ copy.deepcopy(self.state_names))
+
@property
def values(self):
return self._values
@@ -56,7 +63,7 @@ class DiscreteFactor:
return self.values[tuple(state_coordinates)]
def add_new_variables_from_other_factor(self, other):
- """Add new variables to the factor."""
+ """Add new variables from `other` factor to the factor."""
extra_vars = set(other.variables) - set(self.variables)
# if all of these variables already exist there is nothing to do
if len(extra_vars) == 0:
@@ -69,33 +76,24 @@ class DiscreteFactor:
new_card_var = other.get_cardinality(extra_vars)
self.cardinality.extend([new_card_var[var] for var in extra_vars])
- return
def get_cardinality(self, variables):
return {var: self.cardinality[self.variables.index(var)] for var in variables}
def product(self, other):
- left = copy.deepcopy(self)
+ left = self.copy()
if isinstance(other, (int, float)):
- # TODO: handle case of multiplication by constant
- pass
+ return self.values * other
else:
- # assert right is a class or subclass of DiscreteFactor
- # that has attributes: variables, values; method: get_cardinality
- right = copy.deepcopy(other)
+ assert isinstance(other, DiscreteFactor), \
+ "__mul__ is only defined between subclasses of DiscreteFactor"
+ right = other.copy()
left.add_new_variables_from_other_factor(right)
right.add_new_variables_from_other_factor(left)
- print('var', left.variables)
- print(left.cardinality)
- print(left.values)
- print('var', right.variables)
- print(right.cardinality)
- print(right.values)
# reorder variables in right factor to match order in left
source_axes = list(range(right.values.ndim))
- print('source_axes', source_axes)
destination_axes = [right.variables.index(var) for var in left.variables]
right.variables = [right.variables[idx] for idx in destination_axes]
@@ -110,7 +108,7 @@ class DiscreteFactor:
vars: list,
variables over which to marginalize the factor
Returns
- DiscreteFactor
+ DiscreteFactor, whose scope is set(self.variables) - set(vars)
"""
phi = copy.deepcopy(self)
diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py
index 128f645..acd93d4 100644
--- a/beliefs/inference/belief_propagation.py
+++ b/beliefs/inference/belief_propagation.py
@@ -28,10 +28,10 @@ class ConflictingEvidenceError(Exception):
class BeliefPropagation:
def __init__(self, model, inplace=True):
"""
- Input:
- model: an instance of BeliefUpdateNodeModel
- inplace: bool
- modify in-place the nodes in the model during belief propagation
+ Args
+ model: an instance of BeliefUpdateNodeModel
+ inplace: bool,
+ modify in-place the nodes in the model during belief propagation
"""
if not isinstance(model, BeliefUpdateNodeModel):
raise TypeError("Model must be an instance of BeliefUpdateNodeModel")
@@ -43,21 +43,20 @@ class BeliefPropagation:
def _belief_propagation(self, nodes_to_update, evidence):
"""
Implementation of Pearl's belief propagation algorithm for polytrees.
-
ref: "Fusion, Propagation, and Structuring in Belief Networks"
Artificial Intelligence 29 (1986) 241-288
- Input:
- nodes_to_update: list
- list of MsgPasser namedtuples.
- evidence: dict,
- a dict key, value pair as {var: state_of_var observed}
+ Args
+ nodes_to_update: list,
+ list of MsgPasser namedtuples.
+ evidence: dict,
+ a dict key, value pair as {var: state_of_var observed}
"""
if len(nodes_to_update) == 0:
return
node_to_update_label_id, msg_sender_label_id = nodes_to_update.pop()
- logging.info("Node: %s", node_to_update_label_id)
+ logging.debug("Node: %s", node_to_update_label_id)
node = self.model.nodes_dict[node_to_update_label_id]
@@ -65,8 +64,8 @@ class BeliefPropagation:
# outgoing msg from the node to update
parent_ids = set(node.parents) - set([msg_sender_label_id])
child_ids = set(node.children) - set([msg_sender_label_id])
- logging.info("parent_ids: %s", str(parent_ids))
- logging.info("child_ids: %s", str(child_ids))
+ logging.debug("parent_ids: %s", str(parent_ids))
+ logging.debug("child_ids: %s", str(child_ids))
if msg_sender_label_id is not None:
# update triggered by receiving a message, not pinning to evidence
@@ -74,9 +73,9 @@ class BeliefPropagation:
if node_to_update_label_id not in evidence:
node.compute_pi_agg()
- logging.info("belief propagation pi_agg: %s", np.array2string(node.pi_agg.values))
+ logging.debug("belief propagation pi_agg: %s", np.array2string(node.pi_agg.values))
node.compute_lambda_agg()
- logging.info("belief propagation lambda_agg: %s", np.array2string(node.lambda_agg.values))
+ logging.debug("belief propagation lambda_agg: %s", np.array2string(node.lambda_agg.values))
for parent_id in parent_ids:
try:
@@ -101,14 +100,14 @@ class BeliefPropagation:
def initialize_model(self):
"""
- Apply boundary conditions:
+ 1. Apply boundary conditions:
- Set pi_agg equal to prior probabilities for root nodes.
- Set lambda_agg equal to vector of ones for leaf nodes.
- - Set lambda_agg, lambda_received_msgs to vectors of ones (same effect as
- actually passing lambda messages up from leaf nodes to root nodes).
- - Calculate pi_agg and pi_received_msgs for all nodes without evidence.
- (Without evidence, belief equals pi_agg.)
+ 2. Set lambda_agg, lambda_received_msgs to vectors of ones (same effect as
+ actually passing lambda messages up from leaf nodes to root nodes).
+ 3. Calculate pi_agg and pi_received_msgs for all nodes without evidence.
+ (Without evidence, belief equals pi_agg.)
"""
self.model.set_boundary_conditions()
@@ -119,13 +118,13 @@ class BeliefPropagation:
for child in node.lambda_received_msgs.keys():
node.update_lambda_msg_from_child(child=child,
new_value=ones_vector)
- logging.info("Finished initializing Lambda(x) and lambda_received_msgs per node.")
+ logging.debug("Finished initializing Lambda(x) and lambda_received_msgs per node.")
- logging.info("Start downward sweep from nodes. Sending Pi messages only.")
+ logging.debug("Start downward sweep from nodes. Sending Pi messages only.")
topdown_order = self.model.get_topologically_sorted_nodes(reverse=False)
for node_id in topdown_order:
- logging.info('label in iteration through top-down order: %s', str(node_id))
+ logging.debug('label in iteration through top-down order: %s', str(node_id))
node_sending_msg = self.model.nodes_dict[node_id]
child_ids = node_sending_msg.children
@@ -134,9 +133,9 @@ class BeliefPropagation:
node_sending_msg.compute_pi_agg()
for child_id in child_ids:
- logging.info("child: %s", str(child_id))
+ logging.debug("child: %s", str(child_id))
new_pi_msg = node_sending_msg.compute_pi_msg_to_child(child_k=child_id)
- logging.info("new_pi_msg: %s", np.array2string(new_pi_msg))
+ logging.debug("new_pi_msg: %s", np.array2string(new_pi_msg))
child_node = self.model.nodes_dict[child_id]
child_node.update_pi_msg_from_parent(parent=node_id,
@@ -144,9 +143,12 @@ class BeliefPropagation:
def _run_belief_propagation(self, evidence):
"""
- Input:
- evidence: dict
- a dict key, value pair as {var: state_of_var observed}
+ Sequentially perturb nodes with observed values, running belief propagation
+ after each perturbation.
+
+ Args
+ evidence: dict,
+ a dict key, value pair as {var: state_of_var observed}
"""
for evidence_id, observed_value in evidence.items():
if evidence_id not in self.model.nodes_dict.keys():
@@ -162,21 +164,20 @@ class BeliefPropagation:
self.model.nodes_dict[evidence_id].lambda_agg.values * observed_value
)
nodes_to_update = [MsgPassers(msg_receiver=evidence_id, msg_sender=None)]
- self._belief_propagation(nodes_to_update=set(nodes_to_update),
- evidence=evidence)
+ self._belief_propagation(nodes_to_update=set(nodes_to_update), evidence=evidence)
def query(self, evidence={}):
"""
- Run belief propagation given evidence.
+ Run belief propagation given 0 or more pieces of evidence.
- Input:
- evidence: dict
- a dict key, value pair as {var: state_of_var observed},
- e.g. {'3': np.array([0,1])} if label '3' is True.
+ Args
+ evidence: dict,
+ a dict key, value pair as {var: state_of_var observed},
+ e.g. {'3': np.array([0,1])} if label '3' is True.
- Returns:
- beliefs: dict
- a dict key, value pair as {var: belief}
+ Returns
+ a dict key, value pair as {var: belief}, where belief is an np.array of the
+ marginal probability of each state of the variable given the evidence.
Example
-------
diff --git a/beliefs/models/base_models.py b/beliefs/models/base_models.py
index cb91566..71af0cb 100644
--- a/beliefs/models/base_models.py
+++ b/beliefs/models/base_models.py
@@ -9,9 +9,11 @@ class DirectedGraph(nx.DiGraph):
"""
def __init__(self, edges=None, node_labels=None):
"""
- Input:
- edges: an edge list, e.g. [(parent1, child1), (parent1, child2)]
- node_labels: a list of strings of node labels
+ Args
+ edges: list,
+ a list of edge tuples, e.g. [(parent1, child1), (parent1, child2)]
+ node_labels: list,
+ a list of strings or integers representing node label ids
"""
super().__init__()
if edges is not None:
@@ -20,18 +22,15 @@ class DirectedGraph(nx.DiGraph):
self.add_nodes_from(node_labels)
def get_leaves(self):
- """
- Returns a list of leaves of the graph.
- """
+ """Return a list of leaves of the graph"""
return [node for node, out_degree in self.out_degree() if out_degree == 0]
def get_roots(self):
- """
- Returns a list of roots of the graph.
- """
+ """Return a list of roots of the graph"""
return [node for node, in_degree in self.in_degree() if in_degree == 0]
def get_topologically_sorted_nodes(self, reverse=False):
+ """Return a list of nodes in topological sort order"""
if reverse:
return list(reversed(list(nx.topological_sort(self))))
else:
@@ -47,12 +46,12 @@ class BayesianModel(DirectedGraph):
"""
Base class for Bayesian model.
- Input:
- edges: (optional) list of edges,
+ Args
+ edges: (optional) list of edges,
tuples of form ('parent', 'child')
- variables: (optional) list of str or int
+ variables: (optional) list of str or int
labels for variables
- cpds: (optional) list of CPDs
+ cpds: (optional) list of CPDs
TabularCPD class or subclass
"""
super().__init__()
@@ -61,20 +60,17 @@ class BayesianModel(DirectedGraph):
self.cpds = cpds
def copy(self):
- """
- Returns a copy of the model.
- """
- copy_model = self.__class__(edges=list(self.edges()).copy(),
- variables=list(self.nodes()).copy(),
- cpds=[cpd.copy() for cpd in self.cpds])
- return copy_model
+ """Return a copy of the model"""
+ return self.__class__(edges=list(self.edges()).copy(),
+ variables=list(self.nodes()).copy(),
+ cpds=[cpd.copy() for cpd in self.cpds])
def get_variables_in_definite_state(self):
"""
- Returns a set of labels of all nodes in a definite state, i.e. with
- label values that are kronecker deltas.
+ Get labels of all nodes in a definite state, i.e. with label values
+ that are kronecker deltas.
- RETURNS
+ Returns
set of strings (labels)
"""
return {label for label, node in self.nodes_dict.items() if is_kronecker_delta(node.belief)}
@@ -84,14 +80,14 @@ class BayesianModel(DirectedGraph):
Returns a set of labels that are inferred to be in definite state, given
list of labels that were directly observed (e.g. YES/NOs, but not MAYBEs).
- INPUT
- observed: set of strings, directly observed labels
- RETURNS
- set of strings, labels inferred to be in a definite state
+ Args
+ observed: set,
+ set of strings, directly observed labels
+ Returns
+ set of strings, the labels inferred to be in a definite state
"""
-
- # Assert that beliefs of directly observed vars are kronecker deltas
for label in observed:
+ # beliefs of directly observed vars should be kronecker deltas
assert is_kronecker_delta(self.nodes_dict[label].belief), \
("Observed label has belief {} but should be kronecker delta"
.format(self.nodes_dict[label].belief))
@@ -101,28 +97,40 @@ class BayesianModel(DirectedGraph):
"Expected set of observed labels to be a subset of labels in definite state."
return vars_in_definite_state - observed
- def _get_ancestors_of(self, observed):
- """Return list of ancestors of observed labels"""
+ def _get_ancestors_of(self, labels):
+ """
+ Get set of ancestors of an iterable of labels.
+
+ Args
+ observed: iterable,
+ label ids for which ancestors should be retrieved
+
+ Returns
+ ancestors: set,
+ set of label ids of ancestors of the input labels
+ """
ancestors = set()
- for label in observed:
+ for label in labels:
ancestors.update(nx.ancestors(self, label))
return ancestors
def reachable_observed_variables(self, source, observed=set()):
"""
- Returns list of observed labels (labels with direct evidence to be in a definite
+ Get list of directly observed labels (labels with evidence in a definite
state) that are reachable from the source.
- INPUT
- source: string, label of node for which to evaluate reachable observed labels
- observed: set of strings, directly observed labels
- RETURNS
- reachable_observed_vars: set of strings, observed labels (variables with direct
- evidence) that are reachable from the source label.
+ Args
+ source: string,
+ label of node for which to evaluate reachable observed labels
+ observed: set,
+ set of strings, directly observed labels
+ Returns
+ reachable_observed_vars: set,
+ set of strings, observed labels (variables with direct evidence)
+ that are reachable from the source label
"""
- # ancestors of observed labels, including observed labels
ancestors_of_observed = self._get_ancestors_of(observed)
- ancestors_of_observed.update(observed)
+ ancestors_of_observed.update(observed) # include observed labels
visit_list = set()
visit_list.add((source, 'up'))
diff --git a/beliefs/models/belief_update_node_model.py b/beliefs/models/belief_update_node_model.py
index 17e98fa..1a9ab19 100644
--- a/beliefs/models/belief_update_node_model.py
+++ b/beliefs/models/belief_update_node_model.py
@@ -33,9 +33,9 @@ class BeliefUpdateNodeModel(BayesianModel):
"""
def __init__(self, nodes_dict):
"""
- Input:
- nodes_dict: dict
- a dict key, value pair as {label_id: instance_of_node_class_or_subclass}
+ Args
+ nodes_dict: dict
+ a dict key, value pair as {label_id: instance_of_node_class_or_subclass}
"""
super().__init__(edges=self._get_edges_from_nodes(nodes_dict.values()),
variables=list(nodes_dict.keys()),
@@ -45,12 +45,15 @@ class BeliefUpdateNodeModel(BayesianModel):
@classmethod
def init_from_edges(cls, edges, node_class):
- """Create nodes from the same node class.
+ """
+ Create model from edges where all nodes are a from the same node class.
- Input:
- edges: list of edge tuples of form ('parent', 'child')
- node_class: the Node class or subclass from which to
- create all the nodes from edges.
+ Args
+ edges: list,
+ list of edge tuples of form [('parent', 'child')]
+ node_class: Node class or subclass,
+ class from which to create all the nodes automatically from edges,
+ e.g. BernoulliAndNode or BernoulliOrNode
"""
nodes = set()
g = nx.DiGraph(edges)
@@ -68,10 +71,12 @@ class BeliefUpdateNodeModel(BayesianModel):
"""
Return list of all directed edges in nodes.
- Args:
- nodes: an iterable of objects of the Node class or subclass
- Returns:
- edges: list of edge tuples
+ Args
+ nodes: iterable,
+ iterable of objects of the Node class or subclass
+ Returns
+ edges: list,
+ list of edge tuples
"""
edges = set()
for node in nodes:
@@ -82,11 +87,13 @@ class BeliefUpdateNodeModel(BayesianModel):
def set_boundary_conditions(self):
"""
- 1. Root nodes: if x is a node with no parents, set Pi(x) = prior
- probability of x.
+ Set boundary conditions for nodes in the model.
+
+ 1. Root nodes: if x is a node with no parents, set Pi(x) = prior
+ probability of x.
- 2. Leaf nodes: if x is a node with no children, set Lambda(x)
- to an (unnormalized) unit vector, of length the cardinality of x.
+ 2. Leaf nodes: if x is a node with no children, set Lambda(x)
+ to an (unnormalized) unit vector, of length the cardinality of x.
"""
for root in self.get_roots():
self.nodes_dict[root].update_pi_agg(self.nodes_dict[root].cpd.values)
@@ -97,8 +104,11 @@ class BeliefUpdateNodeModel(BayesianModel):
@property
def all_nodes_are_fully_initialized(self):
"""
- Returns True if, for all nodes in the model, all lambda and pi
- messages and lambda_agg and pi_agg are not None, else False.
+ Check if all nodes in the model are initialized, i.e. lambda and pi messages and
+ lambda_agg and pi_agg are not None for every node.
+
+ Returns
+ bool, True if all nodes in the model are initialized, else False.
"""
for node in self.nodes_dict.values():
if not node.is_fully_initialized:
@@ -106,27 +116,27 @@ class BeliefUpdateNodeModel(BayesianModel):
return True
def copy(self):
- """
- Returns a copy of the model.
- """
+ """Return a copy of the model."""
copy_nodes = copy.deepcopy(self.nodes_dict)
copy_model = self.__class__(nodes_dict=copy_nodes)
return copy_model
class Node:
- """A node in a DAG with methods to compute the belief (marginal probability
- of the node given evidence) and compute pi/lambda messages to/from its neighbors
+ """
+ A node in a DAG with methods to compute the belief (marginal probability of
+ the node given evidence) and compute pi/lambda messages to/from its neighbors
to update its belief.
- Implemented from Pearl's belief propagation algorithm.
+ Implemented from Pearl's belief propagation algorithm for polytrees.
"""
def __init__(self, children, cpd):
"""
Args
- children: list of strings
- cpd: an instance of a conditional probability distribution,
- e.g. BernoulliOrCPD or TabularCPD
+ children: list,
+ list of strings
+ cpd: an instance of TabularCPD or one of its subclasses,
+ e.g. BernoulliOrCPD or BernoulliAndCPD
"""
self.label_id = cpd.variable
self.children = children
@@ -134,15 +144,20 @@ class Node:
self.cardinality = cpd.cardinality[0]
self.cpd = cpd
- # instances of DiscreteFactor with `values` an np.array of dimensions [1, cardinality]
- self.pi_agg = self._init_aggregate_values()
- self.lambda_agg = self._init_aggregate_values()
+ self.pi_agg = self._init_factor_for_variable()
+ self.lambda_agg = self._init_factor_for_variable()
self.pi_received_msgs = self._init_pi_received_msgs(self.parents)
- self.lambda_received_msgs = {child: self._init_aggregate_values() for child in children}
+ self.lambda_received_msgs = {child: self._init_factor_for_variable() for child in children}
@property
def belief(self):
+ """
+ Calculate the marginal probability of the variable from its aggregate values.
+
+ Returns
+ belief, an np.array of ndim 1 and shape (self.cardinality,)
+ """
if any(self.pi_agg.values) and any(self.lambda_agg.values):
belief = (self.lambda_agg * self.pi_agg).values
return self._normalize(belief)
@@ -152,29 +167,50 @@ class Node:
def _normalize(self, value):
return value/value.sum()
- def _init_aggregate_values(self):
+ def _init_factor_for_variable(self):
+ """
+ Returns
+ instance of a DiscreteFactor, where DiscreteFactor.values is an np.array of
+ ndim 1 and shape (self.cardinality,)
+ """
return DiscreteFactor(variables=[self.cpd.variable],
cardinality=[self.cardinality],
values=None,
state_names=None)
def _init_pi_received_msgs(self, parents):
+ """
+ Args
+ parents: list,
+ list of strings, parent ids of the node
+ Returns
+ msgs: dict,
+ a dict with key, value pair as {parent_id: instance of a DiscreteFactor},
+ where DiscreteFactor.values is an np.array of ndim 1 and
+ shape (cardinality of parent_id,)
+ """
msgs = {}
for k in parents:
+ if self.cpd.state_names is not None:
+ state_names = {k: self.cpd.state_names[k]}
+ else:
+ state_names = None
+
kth_cardinality = self.cpd.cardinality[self.cpd.variables.index(k)]
msgs[k] = DiscreteFactor(variables=[k],
cardinality=[kth_cardinality],
values=None,
- state_names=None)
+ state_names=state_names)
return msgs
def _return_msgs_received_for_msg_type(self, message_type):
"""
- Input:
- message_type: MessageType enum
-
- Returns:
- msg_values: list of DiscreteFactors containing message values (np.arrays)
+ Args
+ message_type: MessageType enum
+ Returns
+ msg_values: list,
+ list of DiscreteFactors with property `values` containing
+ the values of the messages (np.arrays)
"""
if message_type == MessageType.LAMBDA:
msgs = [msg for msg in self.lambda_received_msgs.values()]
@@ -188,11 +224,12 @@ class Node:
Raise error if all messages have not been received. Called
before calculating lambda_agg (pi_agg).
- Input:
- message_type: MessageType enum
-
- Returns:
- msgs: list of DiscreteFactors containing message values (np.array)
+ Args
+ message_type: MessageType enum
+ Returns
+ msgs: list,
+ list of DiscreteFactors with property `values` containing
+ the values of the messages (np.arrays)
"""
msgs = self._return_msgs_received_for_msg_type(message_type)
@@ -205,6 +242,10 @@ class Node:
return msgs
def compute_pi_agg(self):
+ """
+ Compute and update pi_agg, the prior probability, given the current state
+ of messages received from parents.
+ """
if len(self.parents) == 0:
self.update_pi_agg(self.cpd.values)
else:
@@ -217,6 +258,10 @@ class Node:
pi_msgs = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI)
def compute_lambda_agg(self):
+ """
+ Compute and update lambda_agg, the likelihood, given the current state
+ of messages received from children.
+ """
if len(self.children) != 0:
lambda_msg_values = [
msg.values for msg in
@@ -245,9 +290,8 @@ class Node:
expected_shape = (self.cpd.cardinality[self.cpd.variables.index(key)],)
if new_value.shape != expected_shape:
- raise ValueError("Expected new value to be of dimensions ({},) but got {} instead"
- .format(expected_shape, new_value.shape))
- # received_msg_dict[key]._values = new_value
+ raise ValueError("Expected new value to be of dimensions ({},) but got {} instead"
+ .format(expected_shape, new_value.shape))
received_msg_dict[key].update_values(new_value)
def update_pi_msg_from_parent(self, parent, new_value):
@@ -263,6 +307,15 @@ class Node:
message_type=MessageType.LAMBDA)
def compute_pi_msg_to_child(self, child_k):
+ """
+ Compute pi_msg to child.
+
+ Args
+ child_k: string or int,
+ the label_id of the child receiving the pi_msg
+ Returns
+ np.array of ndim 1 and shape (self.cardinality,)
+ """
lambda_msg_from_child = self.lambda_received_msgs[child_k].values
if lambda_msg_from_child is not None:
with np.errstate(divide='ignore', invalid='ignore'):
@@ -273,6 +326,15 @@ class Node:
raise ValueError("Can't compute pi message to child_{} without having received a lambda message from that child.")
def compute_lambda_msg_to_parent(self, parent_k):
+ """
+ Compute lambda_msg to parent.
+
+ Args
+ parent_k: string or int,
+ the label_id of the parent receiving the lambda_msg
+ Returns
+ np.array of ndim 1 and shape (cardinality of parent_k,)
+ """
if np.array_equal(self.lambda_agg.values, np.ones([self.cardinality])):
return np.ones([self.cardinality])
else:
@@ -306,30 +368,31 @@ class Node:
class BernoulliOrNode(Node):
- def __init__(self,
- label_id,
- children,
- parents):
+ """
+ A node in a DAG associated with a Bernoulli random variable with state_names ['False', 'True']
+ and conditional probability distribution described by 'Or' logic.
+ """
+ def __init__(self, label_id, children, parents):
super().__init__(children=children, cpd=BernoulliOrCPD(label_id, parents))
- def _init_aggregate_values(self):
+ def _init_factor_for_variable(self):
+ """
+ Returns
+ instance of a DiscreteFactor, where DiscreteFactor.values is an np.array of
+ ndim 1 and shape (self.cardinality,)
+ """
variable = self.cpd.variable
return DiscreteFactor(variables=[self.cpd.variable],
cardinality=[self.cardinality],
values=None,
state_names={variable: self.cpd.state_names[variable]})
- def _init_pi_received_msgs(self, parents):
- msgs = {}
- for k in parents:
- kth_cardinality = self.cpd.cardinality[self.cpd.variables.index(k)]
- msgs[k] = DiscreteFactor(variables=[k],
- cardinality=[kth_cardinality],
- values=None,
- state_names={k: self.cpd.state_names[k]})
- return msgs
-
def compute_pi_agg(self):
+ """
+ Compute and update pi_agg, the prior probability, given the current state
+ of messages received from parents. Sidestep explicit factor product and
+ marginalization.
+ """
if len(self.parents) == 0:
self.update_pi_agg(self.cpd.values)
else:
@@ -339,9 +402,18 @@ class BernoulliOrNode(Node):
p_0 = reduce(lambda x, y: x*y, parents_p0)
p_1 = 1 - p_0
self.update_pi_agg(np.array([p_0, p_1]))
- return self.pi_agg
def compute_lambda_msg_to_parent(self, parent_k):
+ """
+ Compute lambda_msg to parent. Sidestep explicit factor product and
+ marginalization.
+
+ Args
+ parent_k: string or int,
+ the label_id of the parent receiving the lambda_msg
+ Returns
+ np.array of ndim 1 and shape (cardinality of parent_k,)
+ """
if np.array_equal(self.lambda_agg.values, np.ones([self.cardinality])):
return np.ones([self.cardinality])
else:
@@ -362,30 +434,31 @@ class BernoulliOrNode(Node):
class BernoulliAndNode(Node):
- def __init__(self,
- label_id,
- children,
- parents):
+ """
+ A node in a DAG associated with a Bernoulli random variable with state_names ['False', 'True']
+ and conditional probability distribution described by 'And' logic.
+ """
+ def __init__(self, label_id, children, parents):
super().__init__(children=children, cpd=BernoulliAndCPD(label_id, parents))
- def _init_aggregate_values(self):
+ def _init_factor_for_variable(self):
+ """
+ Returns
+ instance of a DiscreteFactor, where DiscreteFactor.values is an np.array of
+ ndim 1 and shape (self.cardinality,)
+ """
variable = self.cpd.variable
return DiscreteFactor(variables=[self.cpd.variable],
cardinality=[self.cardinality],
values=None,
state_names={variable: self.cpd.state_names[variable]})
- def _init_pi_received_msgs(self, parents):
- msgs = {}
- for k in parents:
- kth_cardinality = self.cpd.cardinality[self.cpd.variables.index(k)]
- msgs[k] = DiscreteFactor(variables=[k],
- cardinality=[kth_cardinality],
- values=None,
- state_names={k: self.cpd.state_names[k]})
- return msgs
-
def compute_pi_agg(self):
+ """
+ Compute and update pi_agg, the prior probability, given the current state
+ of messages received from parents. Sidestep explicit factor product and
+ marginalization.
+ """
if len(self.parents) == 0:
self.update_pi_agg(self.cpd.values)
else:
@@ -395,9 +468,18 @@ class BernoulliAndNode(Node):
p_1 = reduce(lambda x, y: x*y, parents_p1)
p_0 = 1 - p_1
self.update_pi_agg(np.array([p_0, p_1]))
- return self.pi_agg
def compute_lambda_msg_to_parent(self, parent_k):
+ """
+ Compute lambda_msg to parent. Sidestep explicit factor product and
+ marginalization.
+
+ Args
+ parent_k: string or int,
+ the label_id of the parent receiving the lambda_msg
+ Returns
+ np.array of ndim 1 and shape (cardinality of parent_k,)
+ """
if np.array_equal(self.lambda_agg.values, np.ones([self.cardinality])):
return np.ones([self.cardinality])
else:
diff --git a/beliefs/utils/math_helper.py b/beliefs/utils/math_helper.py
index a25ea68..12325e1 100644
--- a/beliefs/utils/math_helper.py
+++ b/beliefs/utils/math_helper.py
@@ -1,10 +1,16 @@
-"""Random math utils."""
+"""Math utils"""
def is_kronecker_delta(vector):
- """Returns True if vector is a kronecker delta vector, False otherwise.
- Specific evidence ('YES' or 'NO') is a kronecker delta vector, whereas
- virtual evidence ('MAYBE') is not.
+ """
+ Check if vector is a kronecker delta.
+
+ Args:
+ vector: iterable of numbers
+ Returns:
+ bool, True if vector is a kronecker delta vector, False otherwise.
+ In belief propagation, specific evidence (variable is directly observed)
+ is a kronecker delta vector, but virtual evidence is not.
"""
count = 0
for x in vector:
diff --git a/beliefs/utils/random_variables.py b/beliefs/utils/random_variables.py
index 1a0b0f7..cad07aa 100644
--- a/beliefs/utils/random_variables.py
+++ b/beliefs/utils/random_variables.py
@@ -1,3 +1,4 @@
+"""Utilities for working with models and random variables."""
def get_reachable_observed_variables_for_inferred_variables(model, observed=set()):
@@ -6,12 +7,16 @@ def get_reachable_observed_variables_for_inferred_variables(model, observed=set(
("reachable observed variables") that influenced the beliefs of variables inferred
to be in a definite state.
- INPUT
- model: instance of BayesianModel class or subclass
- observed: set of labels (strings) corresponding to vars pinned to definite
- state during inference.
- RETURNS
- dict, of form key - source label (a string), value - a list of strings
+ Args
+ model: instance of BayesianModel class or subclass
+ observed: set,
+ set of labels (strings) corresponding to variables pinned to a definite
+ state during inference.
+ Returns
+ dict,
+ key, value pairs {source_label_id: reachable_observed_vars}, where
+ source_label_id is an int or string, and reachable_observed_vars is a list
+ of label_ids
"""
if not observed:
return {}