From 77d8b323d4f6e05ca97d9cbef43ac85fd8040d61 Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Mon, 13 Nov 2017 14:42:52 -0800 Subject: copy scripts from lgs branch --- beliefs/factors/BernoulliOrFactor.py | 42 +++++ beliefs/factors/__init__.py | 0 beliefs/inference/__init__.py | 0 beliefs/inference/belief_propagation.py | 192 +++++++++++++++++++ beliefs/models/BayesianModel.py | 165 ++++++++++++++++ beliefs/models/BernoulliOrModel.py | 17 ++ beliefs/models/DirectedGraph.py | 2 +- beliefs/types/BernoulliOrNode.py | 47 +++++ beliefs/types/Node.py | 179 ++++++++++++++++++ beliefs/types/__init__.py | 0 beliefs/utils/__init__.py | 0 beliefs/utils/edges_helper.py | 136 +++++++++++++ beliefs/utils/math_helper.py | 19 ++ beliefs/utils/random_variables.py | 21 +++ tests/test_belief_propagation.py | 252 +++++++++++++++++++++++++ tests/test_get_reachable_observed_variables.py | 129 +++++++++++++ 16 files changed, 1200 insertions(+), 1 deletion(-) create mode 100644 beliefs/factors/BernoulliOrFactor.py create mode 100644 beliefs/factors/__init__.py create mode 100644 beliefs/inference/__init__.py create mode 100644 beliefs/inference/belief_propagation.py create mode 100644 beliefs/models/BayesianModel.py create mode 100644 beliefs/models/BernoulliOrModel.py create mode 100644 beliefs/types/BernoulliOrNode.py create mode 100644 beliefs/types/Node.py create mode 100644 beliefs/types/__init__.py create mode 100644 beliefs/utils/__init__.py create mode 100644 beliefs/utils/edges_helper.py create mode 100644 beliefs/utils/math_helper.py create mode 100644 beliefs/utils/random_variables.py create mode 100644 tests/test_belief_propagation.py create mode 100644 tests/test_get_reachable_observed_variables.py diff --git a/beliefs/factors/BernoulliOrFactor.py b/beliefs/factors/BernoulliOrFactor.py new file mode 100644 index 0000000..4f973ae --- /dev/null +++ b/beliefs/factors/BernoulliOrFactor.py @@ -0,0 +1,42 @@ +import numpy as np + + +class BernoulliOrFactor: + """CPD class for a Bernoulli random variable whose relationship to its + parents is described by OR logic. + + If at least one of a child's parents is True, then the child is True, and + False otherwise.""" + def __init__(self, child, parents=set()): + self.child = child + self.parents = set(parents) + self.variables = set([child] + list(parents)) + self.cardinality = [2]*len(self.variables) + self._values = None + + @property + def values(self): + if self._values is None: + self._values = self._build_kwise_values_array(len(self.variables)) + self._values = self._values.reshape(self.cardinality) + return self._values + + def get_values(self): + """ + Returns the tabular cpd form of the values. + """ + if len(self.cardinality) == 1: + return self.values.reshape(1, np.prod(self.cardinality)) + else: + return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) + + @staticmethod + def _build_kwise_values_array(k): + # special case a completely independent factor, and + # return the uniform prior + if k == 1: + return np.array([0.5, 0.5]) + + return np.array( + [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) + ) diff --git a/beliefs/factors/__init__.py b/beliefs/factors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/inference/__init__.py b/beliefs/inference/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py new file mode 100644 index 0000000..972fd5d --- /dev/null +++ b/beliefs/inference/belief_propagation.py @@ -0,0 +1,192 @@ +import numpy as np +from collections import namedtuple + +from beliefs.types.Node import InvalidLambdaMsgToParent +from beliefs.utils.math_helper import is_kronecker_delta + + +MsgPassers = namedtuple('MsgPassers', ['msg_receiver', 'msg_sender']) + + +class ConflictingEvidenceError(Exception): + """Failed to run belief propagation on label graph because of conflicting evidence.""" + def __init__(self, evidence): + message = ( + "Can't run belief propagation with conflicting evidence: {}" + .format(evidence) + ) + super().__init__(message) + + +class BeliefPropagation: + def __init__(self, model, inplace=True): + """ + Input: + model: an instance of BayesianModel class or subclass + inplace: bool + modify in-place the nodes in the model during belief propagation + """ + if inplace is False: + self.model = model.copy() + else: + self.model = model + + def _belief_propagation(self, nodes_to_update, evidence): + """ + Implementation of Pearl's belief propagation algorithm for polytrees. + + ref: "Fusion, Propagation, and Structuring in Belief Networks" + Artificial Intelligence 29 (1986) 241-288 + + Input: + nodes_to_update: list + list of MsgPasser namedtuples. + evidence: dict, + a dict key, value pair as {var: state_of_var observed} + """ + if len(nodes_to_update) == 0: + return + + node_to_update_label_id, msg_sender_label_id = nodes_to_update.pop() + print("Node", node_to_update_label_id) + + node = self.model.nodes[node_to_update_label_id] + + # exclude the message sender (either a parent or child) from getting an + # outgoing msg from the node to update + parent_ids = node.parents - set([msg_sender_label_id]) + child_ids = node.children - set([msg_sender_label_id]) + print("parent_ids:", parent_ids) + print("child_ids:", child_ids) + + if msg_sender_label_id is not None: + # update triggered by receiving a message, not pinning to evidence + assert len(node.parents) + len(node.children) - 1 == len(parent_ids) + len(child_ids) + + if node_to_update_label_id not in evidence: + node.compute_pi_agg() + print("belief propagation pi_agg", node.pi_agg) + node.compute_lambda_agg() + print("belief propagation lambda_agg", node.lambda_agg) + + for parent_id in parent_ids: + try: + new_lambda_msg = node.compute_lambda_msg_to_parent(parent_k=parent_id) + except InvalidLambdaMsgToParent: + raise ConflictingEvidenceError(evidence=evidence) + + parent_node = self.model.nodes[parent_id] + parent_node.update_lambda_msg_from_child(child=node_to_update_label_id, + new_value=new_lambda_msg) + nodes_to_update.add(MsgPassers(msg_receiver=parent_id, + msg_sender=node_to_update_label_id)) + + for child_id in child_ids: + new_pi_msg = node.compute_pi_msg_to_child(child_k=child_id) + child_node = self.model.nodes[child_id] + child_node.update_pi_msg_from_parent(parent=node_to_update_label_id, + new_value=new_pi_msg) + nodes_to_update.add(MsgPassers(msg_receiver=child_id, + msg_sender=node_to_update_label_id)) + + self._belief_propagation(nodes_to_update, evidence) + + def initialize_model(self): + """ + Apply boundary conditions: + - Set pi_agg equal to prior probabilities for root nodes. + - Set lambda_agg equal to vector of ones for leaf nodes. + + - Set lambda_agg, lambda_received_msgs to vectors of ones (same effect as + actually passing lambda messages up from leaf nodes to root nodes). + - Calculate pi_agg and pi_received_msgs for all nodes without evidence. + (Without evidence, belief equals pi_agg.) + """ + self.model.set_boundary_conditions() + + for node in self.model.nodes.values(): + ones_vector = np.ones([node.cardinality]) + + node.lambda_agg = ones_vector + for child in node.lambda_received_msgs.keys(): + node.update_lambda_msg_from_child(child=child, + new_value=ones_vector) + print("Finished initializing Lambda(x) and lambda_received_msgs per node.") + + print("Start downward sweep from nodes. Sending Pi messages only.") + topdown_order = self.model.get_topologically_sorted_nodes(reverse=False) + + for node_id in topdown_order: + print('label in iteration through top-down order:', node_id) + + node_sending_msg = self.model.nodes[node_id] + child_ids = node_sending_msg.children + + if node_sending_msg.pi_agg is None: + node_sending_msg.compute_pi_agg() + + for child_id in child_ids: + print("child", child_id) + new_pi_msg = node_sending_msg.compute_pi_msg_to_child(child_k=child_id) + print(new_pi_msg) + + child_node = self.model.nodes[child_id] + child_node.update_pi_msg_from_parent(parent=node_id, + new_value=new_pi_msg) + + def _run_belief_propagation(self, evidence): + """ + Input: + evidence: dict + a dict key, value pair as {var: state_of_var observed} + """ + for evidence_id, observed_value in evidence.items(): + nodes_to_update = set() + + if evidence_id not in self.model.nodes.keys(): + raise KeyError("Evidence supplied for non-existent label_id: {}" + .format(evidence_id)) + + if is_kronecker_delta(observed_value): + # specific evidence + self.model.nodes[evidence_id].lambda_agg = observed_value + else: + # virtual evidence + self.model.nodes[evidence_id].lambda_agg = \ + self.model.nodes[evidence_id].lambda_agg * observed_value + + nodes_to_update.add(MsgPassers(msg_receiver=evidence_id, + msg_sender=None)) + + self._belief_propagation(nodes_to_update=nodes_to_update, + evidence=evidence) + + def query(self, evidence={}): + """ + Run belief propagation given evidence. + + Input: + evidence: dict + a dict key, value pair as {var: state_of_var observed}, + e.g. {'3': np.array([0,1])} if label '3' is True. + + Returns: + beliefs: dict + a dict key, value pair as {var: belief} + + Example + ------- + >> from label_graph_service.pgm.inference.belief_propagation import BeliefPropagation + >> from label_graph_service.pgm.models.BernoulliOrModel import BernoulliOrModel + >> edges = [('1', '3'), ('2', '3'), ('3', '5')] + >> model = BernoulliOrModel(edges) + >> infer = BeliefPropagation(model) + >> result = infer.query({'2': np.array([0, 1])}) + """ + if not self.model.all_nodes_are_fully_initialized: + self.initialize_model() + + if evidence: + self._run_belief_propagation(evidence) + + return {label_id: node.belief for label_id, node in self.model.nodes.items()} diff --git a/beliefs/models/BayesianModel.py b/beliefs/models/BayesianModel.py new file mode 100644 index 0000000..bdfd037 --- /dev/null +++ b/beliefs/models/BayesianModel.py @@ -0,0 +1,165 @@ +import copy +import numpy as np +import networkx as nx + +from beliefs.models.DirectedGraph import DirectedGraph +from beliefs.utils.edges_helper import EdgesHelper +from beliefs.utils.math_helper import is_kronecker_delta + + +class BayesianModel(DirectedGraph): + """ + Bayesian model stores nodes and edges described by conditional probability + distributions. + """ + def __init__(self, edges, nodes=None): + """ + Input: + edges: list of edge tuples of form ('parent', 'child') + nodes: (optional) dict + a dict key, value pair as {label_id: instance_of_node_class_or_subclass} + """ + if nodes is not None: + super().__init__(edges, nodes.keys()) + else: + super().__init__(edges) + self.nodes = nodes + + @classmethod + def from_node_class(cls, edges, node_class): + """Automatically create all nodes from the same node class + + Input: + edges: list of edge tuples of form ('parent', 'child') + node_class: (optional) the Node class or subclass from which to + create all the nodes from edges. + """ + nodes = cls.create_nodes(edges, node_class) + return cls.__init__(edges=edges, nodes=nodes) + + @staticmethod + def create_nodes(edges, node_class): + """Returns list of Node instances created from edges using + the default node_class""" + edges_helper = EdgesHelper(edges) + nodes = edges_helper.create_nodes_from_edges(node_class=node_class) + label_to_node = dict() + for node in nodes: + label_to_node[node.label_id] = node + return label_to_node + + def set_boundary_conditions(self): + """ + 1. Root nodes: if x is a node with no parents, set Pi(x) = prior + probability of x. + + 2. Leaf nodes: if x is a node with no children, set Lambda(x) + to an (unnormalized) unit vector, of length the cardinality of x. + """ + for root in self.get_roots(): + self.nodes[root].pi_agg = self.nodes[root].cpd.values + + for leaf in self.get_leaves(): + self.nodes[leaf].lambda_agg = np.ones([self.nodes[leaf].cardinality]) + + @property + def all_nodes_are_fully_initialized(self): + """ + Returns True if, for all nodes in the model, all lambda and pi + messages and lambda_agg and pi_agg are not None, else False. + """ + for node in self.nodes.values(): + if not node.is_fully_initialized: + return False + return True + + def copy(self): + """ + Returns a copy of the model. + """ + copy_edges = self.edges().copy() + copy_nodes = copy.deepcopy(self.nodes) + copy_model = self.__class__(edges=copy_edges, nodes=copy_nodes) + return copy_model + + def get_variables_in_definite_state(self): + """ + Returns a set of labels of all nodes in a definite state, i.e. with + label values that are kronecker deltas. + + RETURNS + set of strings (labels) + """ + return {label for label, node in self.nodes.items() if is_kronecker_delta(node.belief)} + + def get_unobserved_variables_in_definite_state(self, observed=set()): + """ + Returns a set of labels that are inferred to be in definite state, given + list of labels that were directly observed (e.g. YES/NOs, but not MAYBEs). + + INPUT + observed: set of strings, directly observed labels + RETURNS + set of strings, labels inferred to be in a definite state + """ + + # Assert that beliefs of directly observed vars are kronecker deltas + for label in observed: + assert is_kronecker_delta(self.nodes[label].belief), \ + ("Observed label has belief {} but should be kronecker delta" + .format(self.nodes[label].belief)) + + vars_in_definite_state = self.get_variables_in_definite_state() + assert observed <= vars_in_definite_state, \ + "Expected set of observed labels to be a subset of labels in definite state." + return vars_in_definite_state - observed + + def _get_ancestors_of(self, observed): + """Return list of ancestors of observed labels, including the observed labels themselves.""" + ancestors = observed.copy() + for label in observed: + ancestors.update(nx.ancestors(self, label)) + return ancestors + + def reachable_observed_variables(self, source, observed=set()): + """ + Returns list of observed labels (labels with direct evidence to be in a definite + state) that are reachable from the source. + + INPUT + source: string, label of node for which to evaluate reachable observed labels + observed: set of strings, directly observed labels + RETURNS + reachable_observed_vars: set of strings, observed labels (variables with direct + evidence) that are reachable from the source label. + """ + ancestors_of_observed = self._get_ancestors_of(observed) + + visit_list = set() + visit_list.add((source, 'up')) + traversed_list = set() + reachable_observed_vars = set() + + while visit_list: + node, direction = visit_list.pop() + if (node, direction) not in traversed_list: + if node in observed: + reachable_observed_vars.add(node) + traversed_list.add((node, direction)) + if direction == 'up' and node not in observed: + for parent in self.predecessors(node): + # causal flow + visit_list.add((parent, 'up')) + for child in self.successors(node): + # common cause flow + visit_list.add((child, 'down')) + elif direction == 'down': + if node not in observed: + # evidential flow + for child in self.successors(node): + visit_list.add((child, 'down')) + if node in ancestors_of_observed: + # common effect flow (activated v-structure) + for parent in self.predecessors(node): + visit_list.add((parent, 'up')) + return reachable_observed_vars diff --git a/beliefs/models/BernoulliOrModel.py b/beliefs/models/BernoulliOrModel.py new file mode 100644 index 0000000..da18fb6 --- /dev/null +++ b/beliefs/models/BernoulliOrModel.py @@ -0,0 +1,17 @@ +from beliefs.models.BayesianModel import BayesianModel +from beliefs.types.BernoulliOrNode import BernoulliOrNode + + +class BernoulliOrModel(BayesianModel): + """ + BernoulliOrModel stores node instances of BernoulliOrNodes (Bernoulli + variables associated with an OR conditional probability distribution). + """ + def __init__(self, edges, nodes=None): + """ + Input: + edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] + """ + if nodes is None: + nodes = self.create_nodes(edges, node_class=BernoulliOrNode) + super().__init__(edges, nodes=nodes) diff --git a/beliefs/models/DirectedGraph.py b/beliefs/models/DirectedGraph.py index 8dfb9bd..8fce894 100644 --- a/beliefs/models/DirectedGraph.py +++ b/beliefs/models/DirectedGraph.py @@ -5,7 +5,7 @@ class DirectedGraph(nx.DiGraph): """ Base class for all directed graphical models. """ - def __init__(self, edges, node_labels): + def __init__(self, edges=None, node_labels=None): """ Input: edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] diff --git a/beliefs/types/BernoulliOrNode.py b/beliefs/types/BernoulliOrNode.py new file mode 100644 index 0000000..27da85a --- /dev/null +++ b/beliefs/types/BernoulliOrNode.py @@ -0,0 +1,47 @@ +import numpy as np +from functools import reduce + +from beliefs.types.Node import ( + Node, + MessageType, + InvalidLambdaMsgToParent +) +from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor + + +class BernoulliOrNode(Node): + def __init__(self, + label_id, + children, + parents): + super().__init__(label_id=label_id, + children=children, + parents=parents, + cardinality=2, + cpd=BernoulliOrFactor(label_id, parents)) + + def compute_pi_agg(self): + if not self.parents: + self.pi_agg = self.cpd.values + else: + pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + parents_p0 = [p[0] for p in pi_msg_values] + p_0 = reduce(lambda x, y: x*y, parents_p0) + p_1 = 1 - p_0 + self.pi_agg = np.array([p_0, p_1]) + return self.pi_agg + + def compute_lambda_msg_to_parent(self, parent_k): + if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): + return np.ones([self.cardinality]) + else: + # TODO: cleanup this validation + _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] + p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) + lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product + lambda_1 = self.lambda_agg[1] + lambda_msg = np.array([lambda_0, lambda_1]) + if not any(lambda_msg): + raise InvalidLambdaMsgToParent + return self._normalize(lambda_msg) diff --git a/beliefs/types/Node.py b/beliefs/types/Node.py new file mode 100644 index 0000000..a8dca7c --- /dev/null +++ b/beliefs/types/Node.py @@ -0,0 +1,179 @@ +import numpy as np +from functools import reduce +from enum import Enum + + +class InvalidLambdaMsgToParent(Exception): + """Computed invalid lambda msg to send to parent.""" + pass + + +class MessageType(Enum): + LAMBDA = 'lambda' + PI = 'pi' + + +class Node: + """A node in a DAG with methods to compute the belief (marginal probability + of the node given evidence) and compute pi/lambda messages to/from its neighbors + to update its belief. + + Implemented from Pearl's belief propagation algorithm. + """ + def __init__(self, + label_id, + children, + parents, + cardinality, + cpd): + """ + Input: + label_id: str + children: str + parents: set of strings + cardinality: int, cardinality of the random variable the node represents + cpd: an instance of a conditional probability distribution, + e.g. BernoulliOrFactor or pgmpy's TabularCPD + """ + self.label_id = label_id + self.children = children + self.parents = parents + self.cardinality = cardinality + self.cpd = cpd + + self.pi_agg = None # np.array dimensions [1, cardinality] + self.lambda_agg = None # np.array dimensions [1, cardinality] + + self.pi_received_msgs = self._init_received_msgs(parents) + self.lambda_received_msgs = self._init_received_msgs(children) + + @classmethod + def from_cpd_class(cls, + label_id, + children, + parents, + cardinality, + cpd_class): + cpd = cpd_class(label_id, parents) + return cls(label_id, children, parents, cardinality, cpd) + + @property + def belief(self): + if self.pi_agg.any() and self.lambda_agg.any(): + belief = np.multiply(self.pi_agg, self.lambda_agg) + return self._normalize(belief) + else: + return None + + def _normalize(self, value): + return value/value.sum() + + @staticmethod + def _init_received_msgs(keys): + return {k: None for k in keys} + + def _return_msgs_received_for_msg_type(self, message_type): + """ + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + if message_type == MessageType.LAMBDA: + msg_values = [msg for msg in self.lambda_received_msgs.values()] + elif message_type == MessageType.PI: + msg_values = [msg for msg in self.pi_received_msgs.values()] + return msg_values + + def validate_and_return_msgs_received_for_msg_type(self, message_type): + """ + Check that all messages have been received from children (parents). + Raise error if all messages have not been received. Called + before calculating lambda_agg (pi_agg). + + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + msg_values = self._return_msgs_received_for_msg_type(message_type) + + if any(msg is None for msg in msg_values): + raise ValueError( + "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg.". + format(msg_type=message_type.value) + ) + else: + return msg_values + + def compute_pi_agg(self): + # TODO: implement explict factor product operation + raise NotImplementedError + + def compute_lambda_agg(self): + if not self.children: + return self.lambda_agg + else: + lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) + self.lambda_agg = reduce(np.multiply, lambda_msg_values) + return self.lambda_agg + + def _update_received_msg_by_key(self, received_msg_dict, key, new_value): + if key not in received_msg_dict.keys(): + raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}". + format(key, received_msg_dict.keys())) + + if not isinstance(new_value, np.ndarray): + raise TypeError("Expected a new value of type numpy.ndarray, but got type {}". + format(type(new_value))) + + if new_value.shape != (self.cardinality,): + raise ValueError("Expected new value to be of dimensions ({},) but got {} instead". + format(self.cardinality, new_value.shape)) + received_msg_dict[key] = new_value + + def update_pi_msg_from_parent(self, parent, new_value): + self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, + key=parent, + new_value=new_value) + + def update_lambda_msg_from_child(self, child, new_value): + self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, + key=child, + new_value=new_value) + + def compute_pi_msg_to_child(self, child_k): + lambda_msg_from_child = self.lambda_received_msgs[child_k] + if lambda_msg_from_child is not None: + with np.errstate(divide='ignore', invalid='ignore'): + # 0/0 := 0 + return self._normalize( + np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) + else: + raise ValueError("Can't compute pi message to child_{} without having received" \ + "a lambda message from that child.") + + def compute_lambda_msg_to_parent(self, parent_k): + # TODO: implement explict factor product operation + raise NotImplementedError + + @property + def is_fully_initialized(self): + """ + Returns True if all lambda and pi messages and lambda_agg and + pi_agg are not None, else False. + """ + lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) + if any(msg is None for msg in lambda_msgs): + return False + + pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) + if any(msg is None for msg in pi_msgs): + return False + + if (self.pi_agg is None) or (self.lambda_agg is None): + return False + + return True diff --git a/beliefs/types/__init__.py b/beliefs/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/utils/__init__.py b/beliefs/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/utils/edges_helper.py b/beliefs/utils/edges_helper.py new file mode 100644 index 0000000..7ac783c --- /dev/null +++ b/beliefs/utils/edges_helper.py @@ -0,0 +1,136 @@ +from collections import defaultdict + +from beliefs.types.Node import Node +from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor + + +class EdgesHelper: + """Class with convenience methods for working with edges.""" + def __init__(self, edges): + self.edges = edges + + def get_label_to_children_dict(self): + """returns dictionary keyed on label, with value a set of children""" + label_to_children_dict = defaultdict(set) + for parent, child in self.edges: + label_to_children_dict[parent].add(child) + return label_to_children_dict + + def get_label_to_parents_dict(self): + """returns dictionary keyed on label, with value a set of parents + Only used to help create dummy factors from edges (not for algo). + """ + label_to_parents_dict = defaultdict(set) + + for parent, child in self.edges: + label_to_parents_dict[child].add(parent) + return label_to_parents_dict + + def get_labels_from_edges(self): + """Return the set of labels that comprise the vertices of a list of edge tuples.""" + all_labels = set() + for parent, child in self.edges: + all_labels.update({parent, child}) + return all_labels + + def create_cpds_from_edges(self, CPD=BernoulliOrFactor): + """ + Create factors from list of edges. + + Input: + cpd: a factor class, assumed initialization takes in a label_id, the label_id of + the child (should = label_id of the node), and set of label_ids of parents. + + Returns: + factors: a set of (unique) factors of the graph + """ + labels = self.get_labels_from_edges() + label_to_parents = self.get_label_to_parents_dict() + + factors = set() + + for label in labels: + parents = label_to_parents[label] + cpd = CPD(label, parents) + factors.add(cpd) + return factors + + def get_label_to_factor_dict(self, CPD=BernoulliOrFactor): + """Create a dictionary mapping each label_id to the CPD/factor where + that label_id is a child. + + Returns: + label_to_factor: dict mapping each label to the cpd that + has that label as a child. + """ + factors = self.create_cpds_from_edges(CPD=CPD) + + label_to_factor = dict() + for factor in factors: + label_to_factor[factor.child] = factor + return label_to_factor + + def get_label_to_node_dict(self, CPD=BernoulliOrFactor): + """Create a dictionary mapping each label_id to a Node instance. + + Returns: + label_to_node: dict mapping each label to the node that has that + label as a label_id. + """ + nodes = self.create_nodes_from_edges() + + label_to_node = dict() + for node in nodes: + label_to_node[node.label_id] = node + return label_to_node + + def get_label_to_node_dict_for_manual_cpds(self, cpds_list): + """Create a dictionary mapping each label_id to a node that is + instantiated with a manually defined pgmpy factor instance. + + Input: + cpds_list - list of instances of pgmpy factors, e.g. TabularCPD + + Returns: + label_to_node: dict mapping each label to the node that has that + label as a label_id. + """ + label_to_children = self.get_label_to_children_dict() + label_to_parents = self.get_label_to_parents_dict() + + label_to_node = dict() + for cpd in cpds_list: + label_id = cpd.variable + + node = Node(label_id=label_id, + children=label_to_children[label_id], + parents=label_to_parents[label_id], + cardinality=2, + cpd=cpd) + label_to_node[label_id] = node + + return label_to_node + + def create_nodes_from_edges(self, node_class): + """ + Create instances of the node_class. Assumes the node class is + initialized by label_id, children, and parents. + + Returns: + nodes: a set of (unique) nodes of the graph + """ + labels = self.get_labels_from_edges() + labels_to_parents = self.get_label_to_parents_dict() + labels_to_children = self.get_label_to_children_dict() + + nodes = set() + + for label in labels: + parents = labels_to_parents[label] + children = labels_to_children[label] + + node = node_class(label_id=label, + children=children, + parents=parents) + nodes.add(node) + return nodes diff --git a/beliefs/utils/math_helper.py b/beliefs/utils/math_helper.py new file mode 100644 index 0000000..a25ea68 --- /dev/null +++ b/beliefs/utils/math_helper.py @@ -0,0 +1,19 @@ +"""Random math utils.""" + + +def is_kronecker_delta(vector): + """Returns True if vector is a kronecker delta vector, False otherwise. + Specific evidence ('YES' or 'NO') is a kronecker delta vector, whereas + virtual evidence ('MAYBE') is not. + """ + count = 0 + for x in vector: + if x == 1: + count += 1 + elif x != 0: + return False + + if count == 1: + return True + else: + return False diff --git a/beliefs/utils/random_variables.py b/beliefs/utils/random_variables.py new file mode 100644 index 0000000..1a0b0f7 --- /dev/null +++ b/beliefs/utils/random_variables.py @@ -0,0 +1,21 @@ + + +def get_reachable_observed_variables_for_inferred_variables(model, observed=set()): + """ + After performing inference on a BayesianModel, get the labels of observed variables + ("reachable observed variables") that influenced the beliefs of variables inferred + to be in a definite state. + + INPUT + model: instance of BayesianModel class or subclass + observed: set of labels (strings) corresponding to vars pinned to definite + state during inference. + RETURNS + dict, of form key - source label (a string), value - a list of strings + """ + if not observed: + return {} + + source_vars = model.get_unobserved_variables_in_definite_state(observed) + + return {var: model.reachable_observed_variables(var, observed) for var in source_vars} diff --git a/tests/test_belief_propagation.py b/tests/test_belief_propagation.py new file mode 100644 index 0000000..223ba78 --- /dev/null +++ b/tests/test_belief_propagation.py @@ -0,0 +1,252 @@ +import numpy as np +import pytest +from pytest import approx + +from beliefs.inference.belief_propagation import BeliefPropagation, ConflictingEvidenceError +from beliefs.models.BernoulliOrModel import BernoulliOrModel +from beliefs.types.BernoulliOrNode import BernoulliOrNode + + +@pytest.fixture(scope='module') +def edges_four_nodes(): + """Edges define a polytree with 4 nodes (connected in an X-shape with the + node, 'x', at the center of the X.""" + edges = [('u', 'x'), ('v', 'x'), ('x', 'y'), ('x', 'z')] + return edges + + +@pytest.fixture(scope='module') +def simple_edges(): + """Edges define a polytree with 15 nodes.""" + edges = [('1', '3'), ('2', '3'), ('3', '5'), ('4', '5'), ('5', '10'), + ('5', '9'), ('6', '8'), ('7', '8'), ('8', '9'), ('9', '11'), + ('9', 'x'), ('14', 'x'), ('x', '12'), ('x', '13')] + return edges + + +@pytest.fixture(scope='module') +def many_parents_edges(): + """Node 62 has 18 parents and no children.""" + edges = [('96', '62'), ('80', '62'), ('98', '62'), + ('100', '62'), ('86', '62'), ('102', '62'), ('104', '62'), + ('64', '62'), ('106', '62'), ('108', '62'), ('110', '62'), + ('112', '62'), ('114', '62'), ('116', '62'), ('118', '62'), + ('122', '62'), ('70', '62'), ('94', '62')] + return edges + + +@pytest.fixture(scope='function') +def four_node_model(edges_four_nodes): + return BernoulliOrModel(edges_four_nodes) + + +@pytest.fixture(scope='function') +def simple_model(simple_edges): + return BernoulliOrModel(simple_edges) + + +@pytest.fixture(scope='function') +def many_parents_model(many_parents_edges): + return BernoulliOrModel(many_parents_edges) + + +@pytest.fixture(scope='function') +def one_node_model(): + a_node = BernoulliOrNode(label_id='x', children=set(), parents=set()) + return BernoulliOrModel(edges=None, nodes={'x': a_node}) + + +def get_label_mapped_to_positive_belief(query_result): + """Return a dictionary mapping each label_id to the probability of + the label being True.""" + return {label_id: belief[1] for label_id, belief in query_result.items()} + + +def compare_dictionaries(expected, observed): + for key, expected_value in expected.items(): + observed_value = observed.get(key) + if observed_value is None: + raise KeyError("Expected key {} not in observed.") + assert observed_value == approx(expected_value), \ + "Expected {} but got {}".format(expected_value, observed_value) + + +#============================================================================================== +# Tests of single Bernoulli node model + +def test_no_evidence_one_node_model(one_node_model): + expected = {'x': 0.5} + infer = BeliefPropagation(one_node_model) + query_result = infer.query(evidence={}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_virtual_evidence_one_node_model(one_node_model): + """Curator thinks YES is 10x more likely than NO based on virtual evidence.""" + expected = {'x': 5/(0.5+5)} + infer = BeliefPropagation(one_node_model) + query_result = infer.query(evidence={'x': np.array([1, 10])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_MAYBE_default_evidence_one_node_model(one_node_model): + expected = {'x': 0.5} + infer = BeliefPropagation(one_node_model) + query_result = infer.query(evidence={'x': np.array([0.5, 0.5])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_YES_evidence_one_node_model(one_node_model): + expected = {'x': 1} + infer = BeliefPropagation(one_node_model) + query_result = infer.query(evidence={'x': np.array([0, 1])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_NO_evidence_one_node_model(one_node_model): + expected = {'x': 0} + infer = BeliefPropagation(one_node_model) + query_result = infer.query(evidence={'x': np.array([1, 0])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +#============================================================================================== +# Tests of 4-node, 4-edge model + +def test_no_evidence_four_node_model(four_node_model): + expected = {'x': 1-0.5**2} + infer = BeliefPropagation(four_node_model) + query_result = infer.query(evidence={}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_virtual_evidence_for_node_x_four_node_model(four_node_model): + """Virtual evidence for node x.""" + expected = {'x': 0.967741935483871, 'y': 0.967741935483871, 'z': 0.967741935483871, + 'u': 0.6451612903225806, 'v': 0.6451612903225806} + infer = BeliefPropagation(four_node_model) + query_result = infer.query(evidence={'x': np.array([1, 10])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +#============================================================================================== +# Tests of simple BernoulliOr polytree model + +def test_no_evidence_simple_model(simple_model): + expected = {'x': 0.984375, '14': 0.5, '7': 0.5, '2': 0.5, '3': + 0.75, '13': 0.984375, '6': 0.5, '4': 0.5, '8': 0.75, '10': 0.875, + '1': 0.5, '9': 0.96875, '12': 0.984375, '5': 0.875, '11': 0.96875} + infer = BeliefPropagation(simple_model) + query_result = infer.query(evidence={}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_belief_propagation_no_modify_model_inplace(simple_model): + assert not simple_model.all_nodes_are_fully_initialized + infer = BeliefPropagation(simple_model, inplace=False) + _ = infer.query(evidence={}) + # after belief propagation, model node values should be unchanged + assert not simple_model.all_nodes_are_fully_initialized + + +def test_belief_propagation_modify_model_inplace(simple_model): + assert not simple_model.all_nodes_are_fully_initialized + expected = {'x': 0.984375, '14': 0.5, '7': 0.5, '2': 0.5, '3': + 0.75, '13': 0.984375, '6': 0.5, '4': 0.5, '8': 0.75, '10': 0.875, + '1': 0.5, '9': 0.96875, '12': 0.984375, '5': 0.875, '11': 0.96875} + infer = BeliefPropagation(simple_model, inplace=True) + _ = infer.query(evidence={}) + + assert simple_model.all_nodes_are_fully_initialized + beliefs_from_model = {node_id: node.belief[1] for node_id, node in simple_model.nodes.items()} + compare_dictionaries(expected, beliefs_from_model) + + +def test_positive_evidence_node_13(simple_model): + expected = {'6': 0.50793650793650791, '3': 0.76190476190476186, + '9': 0.98412698412698407, '8': 0.76190476190476186, + 'x': 1.0, '4': 0.50793650793650791, '11': 0.98412698412698407, + '1': 0.50793650793650791, '5': 0.88888888888888884, + '2': 0.50793650793650791, '12': 1.0, + '14': 0.50793650793650791, '13': 1, + '10': 0.88888888888888884, '7': 0.50793650793650791} + infer = BeliefPropagation(simple_model) + query_result = infer.query(evidence={'13': np.array([0, 1])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_positive_evidence_node_5(simple_model): + expected = {'1': 0.5714285714285714, '5': 1, '3': + 0.8571428571428571, '10': 1.0, '8': 0.75, '2': 0.5714285714285714, + '4': 0.5714285714285714, '6': 0.5, '7': 0.5, '14': 0.5, '12': 1.0, + '13': 1.0, '11': 1.0, '9': 1.0, 'x': 1.0} + infer = BeliefPropagation(simple_model) + query_result = infer.query(evidence={'5': np.array([0, 1])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_positive_evidence_node_5_negative_evidence_node_14(simple_model): + expected = {'6': 0.5, '7': 0.5, '9': 1.0, '3': 0.8571428571428571, + '1': 0.57142857142857151, '12': 1.0, 'x': 1.0, '11': 1.0, '14': + 0.0, '2': 0.57142857142857151, '4': 0.5714285714285714, '5': 1.0, + '10': 1.0, '13': 1.0, '8': 0.75} + infer = BeliefPropagation(simple_model) + query_result = infer.query(evidence={'5': np.array([0, 1]), '14': np.array([1, 0])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_conflicting_evidence(simple_model): + infer = BeliefPropagation(simple_model) + with pytest.raises(ConflictingEvidenceError) as err: + query_result = infer.query(evidence={'x': np.array([1, 0]), '5': np.array([0, 1])}) + assert "Can't run belief propagation with conflicting evidence" in str(err) + + +#============================================================================================== +# Tests of model with 18 parents sharing a single child + +def test_no_evidence_many_parents_model(many_parents_model): + expected = {'64': 0.5, '86': 0.5, '62': 0.99999618530273438, + '116': 0.5, '100': 0.5, '108': 0.5, '122': 0.5, '114': 0.5, '98': + 0.5, '106': 0.5, '94': 0.5, '80': 0.5, '102': 0.5, '70': 0.5, + '118': 0.5, '96': 0.5, '104': 0.5, '110': 0.5, '112': 0.5} + infer = BeliefPropagation(many_parents_model) + query_result = infer.query(evidence={}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_positive_evidence_node_112(many_parents_model): + """If a single parent (112) is True, then (62) has to be True.""" + expected = {'64': 0.5, '86': 0.5, '62': 1.0, '116': 0.5, '100': + 0.5, '108': 0.5, '122': 0.5, '114': 0.5, '98': 0.5, + '106': 0.5, '94': 0.5, '80': 0.5, '102': 0.5, '70': + 0.5, '118': 0.5, '96': 0.5, '104': 0.5, '110': 0.5, + '112': 1.0} + infer = BeliefPropagation(many_parents_model) + query_result = infer.query(evidence={'112': np.array([0, 1])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) + + +def test_negative_evidence_node_62(many_parents_model): + """If node 62 is False, then all of its parents must be False.""" + expected = {'64': 0, '86': 0, '62': 0, '116': 0, '100': 0, '108': + 0, '122': 0, '114': 0, '98': 0, '106': 0, '94': 0, + '80': 0, '102': 0, '70': 0, '118': 0, '96': 0, '104': + 0, '110': 0, '112': 0} + infer = BeliefPropagation(many_parents_model) + query_result = infer.query(evidence={'62': np.array([1, 0])}) + result = get_label_mapped_to_positive_belief(query_result) + compare_dictionaries(expected, result) diff --git a/tests/test_get_reachable_observed_variables.py b/tests/test_get_reachable_observed_variables.py new file mode 100644 index 0000000..d6590ad --- /dev/null +++ b/tests/test_get_reachable_observed_variables.py @@ -0,0 +1,129 @@ +import numpy as np + +from test_belief_propagation import simple_model, simple_edges + +from beliefs.inference.belief_propagation import BeliefPropagation +from beliefs.utils.random_variables import ( + get_reachable_observed_variables_for_inferred_variables +) + + +def test_reachable_observed_vars_direct_common_effect(simple_model): + observed_vars = {'14': np.array([1,0]), 'x': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'x', '14'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_indirect_common_effect(simple_model): + observed_vars = {'12': np.array([1,0]), '14': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'12', '14'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_common_cause(simple_model): + observed_vars = {'10': np.array([0,1])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'10'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_blocked_common_cause(simple_model): + observed_vars = {'10': np.array([0,1]), '5': np.array([0,1])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'5'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_indirect_causal(simple_model): + observed_vars = {'1': np.array([0,1]), '2': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'1', '2'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_blocked_causal(simple_model): + observed_vars = {'1': np.array([0,1]), '2': np.array([1,0]), '3': np.array([0,1])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'3'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_indirect_evidential(simple_model): + observed_vars = {'13': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'13'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_reachable_observed_vars_blocked_evidential(simple_model): + observed_vars = {'x': np.array([1,0]), '13': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + expected = {'x'} + observed = simple_model.reachable_observed_variables( + source='9', + observed=set(observed_vars.keys()) + ) + assert expected == observed + + +def test_get_reachable_obs_vars_for_inferred(simple_model): + observed_vars = {'6': np.array([1,0]), '7': np.array([1,0]), '10': np.array([1,0])} + infer = BeliefPropagation(simple_model) + infer.query(evidence=observed_vars) + + print(set(simple_model.get_unobserved_variables_in_definite_state(observed_vars.keys()))) + print(simple_model._get_ancestors_of(set(observed_vars.keys()))) + expected = {'4': {'10'}, '1': {'10'}, '11': {'7', '6', '10'}, '2': {'10'}, + '8': {'7', '6'}, '5': {'10'}, '3': {'10'}, '9': {'7', '6', '10'}} + + observed = get_reachable_observed_variables_for_inferred_variables( + model=simple_model, + observed=set(observed_vars.keys()) + ) + assert expected == observed -- cgit v1.2.3 From b16e990b7e4d00e427d4445ba38eef0fb967963a Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Mon, 13 Nov 2017 15:34:33 -0800 Subject: changes to work with bump from networkx 1.11 to 2.0 some nx functions now return iterators --- beliefs/inference/belief_propagation.py | 22 +++++++++++----------- beliefs/models/BayesianModel.py | 24 ++++++++++++------------ beliefs/models/BernoulliOrModel.py | 2 +- beliefs/models/DirectedGraph.py | 11 ++++++----- tests/test_belief_propagation.py | 3 ++- 5 files changed, 32 insertions(+), 30 deletions(-) diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py index 972fd5d..ecd5e9c 100644 --- a/beliefs/inference/belief_propagation.py +++ b/beliefs/inference/belief_propagation.py @@ -50,7 +50,7 @@ class BeliefPropagation: node_to_update_label_id, msg_sender_label_id = nodes_to_update.pop() print("Node", node_to_update_label_id) - node = self.model.nodes[node_to_update_label_id] + node = self.model.nodes_dict[node_to_update_label_id] # exclude the message sender (either a parent or child) from getting an # outgoing msg from the node to update @@ -75,7 +75,7 @@ class BeliefPropagation: except InvalidLambdaMsgToParent: raise ConflictingEvidenceError(evidence=evidence) - parent_node = self.model.nodes[parent_id] + parent_node = self.model.nodes_dict[parent_id] parent_node.update_lambda_msg_from_child(child=node_to_update_label_id, new_value=new_lambda_msg) nodes_to_update.add(MsgPassers(msg_receiver=parent_id, @@ -83,7 +83,7 @@ class BeliefPropagation: for child_id in child_ids: new_pi_msg = node.compute_pi_msg_to_child(child_k=child_id) - child_node = self.model.nodes[child_id] + child_node = self.model.nodes_dict[child_id] child_node.update_pi_msg_from_parent(parent=node_to_update_label_id, new_value=new_pi_msg) nodes_to_update.add(MsgPassers(msg_receiver=child_id, @@ -104,7 +104,7 @@ class BeliefPropagation: """ self.model.set_boundary_conditions() - for node in self.model.nodes.values(): + for node in self.model.nodes_dict.values(): ones_vector = np.ones([node.cardinality]) node.lambda_agg = ones_vector @@ -119,7 +119,7 @@ class BeliefPropagation: for node_id in topdown_order: print('label in iteration through top-down order:', node_id) - node_sending_msg = self.model.nodes[node_id] + node_sending_msg = self.model.nodes_dict[node_id] child_ids = node_sending_msg.children if node_sending_msg.pi_agg is None: @@ -130,7 +130,7 @@ class BeliefPropagation: new_pi_msg = node_sending_msg.compute_pi_msg_to_child(child_k=child_id) print(new_pi_msg) - child_node = self.model.nodes[child_id] + child_node = self.model.nodes_dict[child_id] child_node.update_pi_msg_from_parent(parent=node_id, new_value=new_pi_msg) @@ -143,17 +143,17 @@ class BeliefPropagation: for evidence_id, observed_value in evidence.items(): nodes_to_update = set() - if evidence_id not in self.model.nodes.keys(): + if evidence_id not in self.model.nodes_dict.keys(): raise KeyError("Evidence supplied for non-existent label_id: {}" .format(evidence_id)) if is_kronecker_delta(observed_value): # specific evidence - self.model.nodes[evidence_id].lambda_agg = observed_value + self.model.nodes_dict[evidence_id].lambda_agg = observed_value else: # virtual evidence - self.model.nodes[evidence_id].lambda_agg = \ - self.model.nodes[evidence_id].lambda_agg * observed_value + self.model.nodes_dict[evidence_id].lambda_agg = \ + self.model.nodes_dict[evidence_id].lambda_agg * observed_value nodes_to_update.add(MsgPassers(msg_receiver=evidence_id, msg_sender=None)) @@ -189,4 +189,4 @@ class BeliefPropagation: if evidence: self._run_belief_propagation(evidence) - return {label_id: node.belief for label_id, node in self.model.nodes.items()} + return {label_id: node.belief for label_id, node in self.model.nodes_dict.items()} diff --git a/beliefs/models/BayesianModel.py b/beliefs/models/BayesianModel.py index bdfd037..6257a57 100644 --- a/beliefs/models/BayesianModel.py +++ b/beliefs/models/BayesianModel.py @@ -12,18 +12,18 @@ class BayesianModel(DirectedGraph): Bayesian model stores nodes and edges described by conditional probability distributions. """ - def __init__(self, edges, nodes=None): + def __init__(self, edges, nodes_dict=None): """ Input: edges: list of edge tuples of form ('parent', 'child') nodes: (optional) dict a dict key, value pair as {label_id: instance_of_node_class_or_subclass} """ - if nodes is not None: - super().__init__(edges, nodes.keys()) + if nodes_dict is not None: + super().__init__(edges, nodes_dict.keys()) else: super().__init__(edges) - self.nodes = nodes + self.nodes_dict = nodes_dict @classmethod def from_node_class(cls, edges, node_class): @@ -57,10 +57,10 @@ class BayesianModel(DirectedGraph): to an (unnormalized) unit vector, of length the cardinality of x. """ for root in self.get_roots(): - self.nodes[root].pi_agg = self.nodes[root].cpd.values + self.nodes_dict[root].pi_agg = self.nodes_dict[root].cpd.values for leaf in self.get_leaves(): - self.nodes[leaf].lambda_agg = np.ones([self.nodes[leaf].cardinality]) + self.nodes_dict[leaf].lambda_agg = np.ones([self.nodes_dict[leaf].cardinality]) @property def all_nodes_are_fully_initialized(self): @@ -68,7 +68,7 @@ class BayesianModel(DirectedGraph): Returns True if, for all nodes in the model, all lambda and pi messages and lambda_agg and pi_agg are not None, else False. """ - for node in self.nodes.values(): + for node in self.nodes_dict.values(): if not node.is_fully_initialized: return False return True @@ -77,8 +77,8 @@ class BayesianModel(DirectedGraph): """ Returns a copy of the model. """ - copy_edges = self.edges().copy() - copy_nodes = copy.deepcopy(self.nodes) + copy_edges = list(self.edges()).copy() + copy_nodes = copy.deepcopy(self.nodes_dict) copy_model = self.__class__(edges=copy_edges, nodes=copy_nodes) return copy_model @@ -90,7 +90,7 @@ class BayesianModel(DirectedGraph): RETURNS set of strings (labels) """ - return {label for label, node in self.nodes.items() if is_kronecker_delta(node.belief)} + return {label for label, node in self.nodes_dict.items() if is_kronecker_delta(node.belief)} def get_unobserved_variables_in_definite_state(self, observed=set()): """ @@ -105,9 +105,9 @@ class BayesianModel(DirectedGraph): # Assert that beliefs of directly observed vars are kronecker deltas for label in observed: - assert is_kronecker_delta(self.nodes[label].belief), \ + assert is_kronecker_delta(self.nodes_dict[label].belief), \ ("Observed label has belief {} but should be kronecker delta" - .format(self.nodes[label].belief)) + .format(self.nodes_dict[label].belief)) vars_in_definite_state = self.get_variables_in_definite_state() assert observed <= vars_in_definite_state, \ diff --git a/beliefs/models/BernoulliOrModel.py b/beliefs/models/BernoulliOrModel.py index da18fb6..bf2b44c 100644 --- a/beliefs/models/BernoulliOrModel.py +++ b/beliefs/models/BernoulliOrModel.py @@ -14,4 +14,4 @@ class BernoulliOrModel(BayesianModel): """ if nodes is None: nodes = self.create_nodes(edges, node_class=BernoulliOrNode) - super().__init__(edges, nodes=nodes) + super().__init__(edges, nodes_dict=nodes) diff --git a/beliefs/models/DirectedGraph.py b/beliefs/models/DirectedGraph.py index 8fce894..84b3a02 100644 --- a/beliefs/models/DirectedGraph.py +++ b/beliefs/models/DirectedGraph.py @@ -21,15 +21,16 @@ class DirectedGraph(nx.DiGraph): """ Returns a list of leaves of the graph. """ - return [node for node, out_degree in self.out_degree_iter() if - out_degree == 0] + return [node for node, out_degree in self.out_degree() if out_degree == 0] def get_roots(self): """ Returns a list of roots of the graph. """ - return [node for node, in_degree in self.in_degree().items() if - in_degree == 0] + return [node for node, in_degree in self.in_degree() if in_degree == 0] def get_topologically_sorted_nodes(self, reverse=False): - return nx.topological_sort(self, reverse=reverse) + if reverse: + return list(reversed(list(nx.topological_sort(self)))) + else: + return nx.topological_sort(self) diff --git a/tests/test_belief_propagation.py b/tests/test_belief_propagation.py index 223ba78..ef7ffb0 100644 --- a/tests/test_belief_propagation.py +++ b/tests/test_belief_propagation.py @@ -166,7 +166,8 @@ def test_belief_propagation_modify_model_inplace(simple_model): _ = infer.query(evidence={}) assert simple_model.all_nodes_are_fully_initialized - beliefs_from_model = {node_id: node.belief[1] for node_id, node in simple_model.nodes.items()} + beliefs_from_model = {node_id: node.belief[1] for + node_id, node in simple_model.nodes_dict.items()} compare_dictionaries(expected, beliefs_from_model) -- cgit v1.2.3 From 71e384a741e52f94882b14062a3dc10e5f391533 Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Mon, 20 Nov 2017 11:40:02 -0800 Subject: BernoulliOrCPD inherits from TabularCPD --- beliefs/factors/BernoulliOrCPD.py | 37 +++++++++++++++++++++++++++++ beliefs/factors/BernoulliOrFactor.py | 42 --------------------------------- beliefs/factors/CPD.py | 36 ++++++++++++++++++++++++++++ beliefs/inference/belief_propagation.py | 4 ++-- beliefs/types/BernoulliOrNode.py | 4 ++-- beliefs/types/Node.py | 2 +- beliefs/utils/edges_helper.py | 12 +++++----- tests/test_belief_propagation.py | 3 ++- 8 files changed, 86 insertions(+), 54 deletions(-) create mode 100644 beliefs/factors/BernoulliOrCPD.py delete mode 100644 beliefs/factors/BernoulliOrFactor.py create mode 100644 beliefs/factors/CPD.py diff --git a/beliefs/factors/BernoulliOrCPD.py b/beliefs/factors/BernoulliOrCPD.py new file mode 100644 index 0000000..e4fcbf1 --- /dev/null +++ b/beliefs/factors/BernoulliOrCPD.py @@ -0,0 +1,37 @@ +import numpy as np + +from beliefs.factors.CPD import TabularCPD + + +class BernoulliOrCPD(TabularCPD): + """CPD class for a Bernoulli random variable whose relationship to its + parents (also Bernoulli random variables) is described by OR logic. + + If at least one of the variable's parents is True, then the variable + is True, and False otherwise. + """ + def __init__(self, variable, parents=set()): + super().__init__(variable=variable, + variable_card=2, + parents=parents, + parents_card=[2]*len(parents), + values=None) + self._values = None + + @property + def values(self): + if self._values is None: + self._values = self._build_kwise_values_array(len(self.variables)) + self._values = self._values.reshape(self.cardinality) + return self._values + + @staticmethod + def _build_kwise_values_array(k): + # special case a completely independent factor, and + # return the uniform prior + if k == 1: + return np.array([0.5, 0.5]) + + return np.array( + [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) + ) diff --git a/beliefs/factors/BernoulliOrFactor.py b/beliefs/factors/BernoulliOrFactor.py deleted file mode 100644 index 4f973ae..0000000 --- a/beliefs/factors/BernoulliOrFactor.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - - -class BernoulliOrFactor: - """CPD class for a Bernoulli random variable whose relationship to its - parents is described by OR logic. - - If at least one of a child's parents is True, then the child is True, and - False otherwise.""" - def __init__(self, child, parents=set()): - self.child = child - self.parents = set(parents) - self.variables = set([child] + list(parents)) - self.cardinality = [2]*len(self.variables) - self._values = None - - @property - def values(self): - if self._values is None: - self._values = self._build_kwise_values_array(len(self.variables)) - self._values = self._values.reshape(self.cardinality) - return self._values - - def get_values(self): - """ - Returns the tabular cpd form of the values. - """ - if len(self.cardinality) == 1: - return self.values.reshape(1, np.prod(self.cardinality)) - else: - return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) - - @staticmethod - def _build_kwise_values_array(k): - # special case a completely independent factor, and - # return the uniform prior - if k == 1: - return np.array([0.5, 0.5]) - - return np.array( - [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) - ) diff --git a/beliefs/factors/CPD.py b/beliefs/factors/CPD.py new file mode 100644 index 0000000..8de47b3 --- /dev/null +++ b/beliefs/factors/CPD.py @@ -0,0 +1,36 @@ +import numpy as np + + +class TabularCPD: + """ + Defines the conditional probability table for a discrete variable + whose parents are also discrete. + + TODO: have this inherit from DiscreteFactor + """ + def __init__(self, variable, variable_card, + parents=[], parents_card=[], values=[]): + """ + Args: + variable: int or string + variable_card: int + parents: optional, list of int and/or strings + parents_card: optional, list of int + values: optional, 2d list or array + """ + self.variable = variable + self.parents = parents + self.variables = [variable] + parents + self.cardinality = [variable_card] + parents_card + + if values: + self.values = np.array(values) + + def get_values(self): + """ + Returns the tabular cpd form of the values. + """ + if len(self.cardinality) == 1: + return self.values.reshape(1, np.prod(self.cardinality)) + else: + return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py index ecd5e9c..37aa437 100644 --- a/beliefs/inference/belief_propagation.py +++ b/beliefs/inference/belief_propagation.py @@ -54,8 +54,8 @@ class BeliefPropagation: # exclude the message sender (either a parent or child) from getting an # outgoing msg from the node to update - parent_ids = node.parents - set([msg_sender_label_id]) - child_ids = node.children - set([msg_sender_label_id]) + parent_ids = set(node.parents) - set([msg_sender_label_id]) + child_ids = set(node.children) - set([msg_sender_label_id]) print("parent_ids:", parent_ids) print("child_ids:", child_ids) diff --git a/beliefs/types/BernoulliOrNode.py b/beliefs/types/BernoulliOrNode.py index 27da85a..ce497b9 100644 --- a/beliefs/types/BernoulliOrNode.py +++ b/beliefs/types/BernoulliOrNode.py @@ -6,7 +6,7 @@ from beliefs.types.Node import ( MessageType, InvalidLambdaMsgToParent ) -from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor +from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD class BernoulliOrNode(Node): @@ -18,7 +18,7 @@ class BernoulliOrNode(Node): children=children, parents=parents, cardinality=2, - cpd=BernoulliOrFactor(label_id, parents)) + cpd=BernoulliOrCPD(label_id, parents)) def compute_pi_agg(self): if not self.parents: diff --git a/beliefs/types/Node.py b/beliefs/types/Node.py index a8dca7c..a496acf 100644 --- a/beliefs/types/Node.py +++ b/beliefs/types/Node.py @@ -33,7 +33,7 @@ class Node: parents: set of strings cardinality: int, cardinality of the random variable the node represents cpd: an instance of a conditional probability distribution, - e.g. BernoulliOrFactor or pgmpy's TabularCPD + e.g. BernoulliOrCPD or TabularCPD """ self.label_id = label_id self.children = children diff --git a/beliefs/utils/edges_helper.py b/beliefs/utils/edges_helper.py index 7ac783c..c959a3b 100644 --- a/beliefs/utils/edges_helper.py +++ b/beliefs/utils/edges_helper.py @@ -1,7 +1,7 @@ from collections import defaultdict from beliefs.types.Node import Node -from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor +from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD class EdgesHelper: @@ -33,7 +33,7 @@ class EdgesHelper: all_labels.update({parent, child}) return all_labels - def create_cpds_from_edges(self, CPD=BernoulliOrFactor): + def create_cpds_from_edges(self, CPD=BernoulliOrCPD): """ Create factors from list of edges. @@ -55,7 +55,7 @@ class EdgesHelper: factors.add(cpd) return factors - def get_label_to_factor_dict(self, CPD=BernoulliOrFactor): + def get_label_to_factor_dict(self, CPD=BernoulliOrCPD): """Create a dictionary mapping each label_id to the CPD/factor where that label_id is a child. @@ -70,7 +70,7 @@ class EdgesHelper: label_to_factor[factor.child] = factor return label_to_factor - def get_label_to_node_dict(self, CPD=BernoulliOrFactor): + def get_label_to_node_dict(self, CPD=BernoulliOrCPD): """Create a dictionary mapping each label_id to a Node instance. Returns: @@ -126,8 +126,8 @@ class EdgesHelper: nodes = set() for label in labels: - parents = labels_to_parents[label] - children = labels_to_children[label] + parents = list(labels_to_parents[label]) + children = list(labels_to_children[label]) node = node_class(label_id=label, children=children, diff --git a/tests/test_belief_propagation.py b/tests/test_belief_propagation.py index ef7ffb0..24ee94b 100644 --- a/tests/test_belief_propagation.py +++ b/tests/test_belief_propagation.py @@ -52,7 +52,8 @@ def many_parents_model(many_parents_edges): @pytest.fixture(scope='function') def one_node_model(): - a_node = BernoulliOrNode(label_id='x', children=set(), parents=set()) + a_node = BernoulliOrNode(label_id='x', children=[], parents=[]) + # a_node = BernoulliOrNode(label_id='x', children=set(), parents=set()) return BernoulliOrModel(edges=None, nodes={'x': a_node}) -- cgit v1.2.3 From d166e36eaf5803af035e444628c67701322b0eb6 Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Mon, 20 Nov 2017 17:05:37 -0800 Subject: refactor msg passing methods to BeliefUpdateNodeModel from BayesianModel --- beliefs/factors/BernoulliOrCPD.py | 13 +- beliefs/factors/CPD.py | 15 +- beliefs/inference/belief_propagation.py | 7 +- beliefs/models/BayesianModel.py | 76 ++------- beliefs/models/BernoulliOrModel.py | 17 -- .../models/beliefupdate/BeliefUpdateNodeModel.py | 91 +++++++++++ beliefs/models/beliefupdate/BernoulliOrNode.py | 47 ++++++ beliefs/models/beliefupdate/Node.py | 179 +++++++++++++++++++++ beliefs/types/BernoulliOrNode.py | 47 ------ beliefs/types/Node.py | 179 --------------------- beliefs/types/__init__.py | 0 beliefs/utils/edges_helper.py | 2 +- tests/test_belief_propagation.py | 13 +- 13 files changed, 365 insertions(+), 321 deletions(-) delete mode 100644 beliefs/models/BernoulliOrModel.py create mode 100644 beliefs/models/beliefupdate/BeliefUpdateNodeModel.py create mode 100644 beliefs/models/beliefupdate/BernoulliOrNode.py create mode 100644 beliefs/models/beliefupdate/Node.py delete mode 100644 beliefs/types/BernoulliOrNode.py delete mode 100644 beliefs/types/Node.py delete mode 100644 beliefs/types/__init__.py diff --git a/beliefs/factors/BernoulliOrCPD.py b/beliefs/factors/BernoulliOrCPD.py index e4fcbf1..2c6a31e 100644 --- a/beliefs/factors/BernoulliOrCPD.py +++ b/beliefs/factors/BernoulliOrCPD.py @@ -10,17 +10,22 @@ class BernoulliOrCPD(TabularCPD): If at least one of the variable's parents is True, then the variable is True, and False otherwise. """ - def __init__(self, variable, parents=set()): + def __init__(self, variable, parents=[]): + """ + Args: + variable: int or string + parents: optional, list of int and/or strings + """ super().__init__(variable=variable, variable_card=2, parents=parents, parents_card=[2]*len(parents), - values=None) - self._values = None + values=[]) + self._values = [] @property def values(self): - if self._values is None: + if not any(self._values): self._values = self._build_kwise_values_array(len(self.variables)) self._values = self._values.reshape(self.cardinality) return self._values diff --git a/beliefs/factors/CPD.py b/beliefs/factors/CPD.py index 8de47b3..a286aaa 100644 --- a/beliefs/factors/CPD.py +++ b/beliefs/factors/CPD.py @@ -6,7 +6,7 @@ class TabularCPD: Defines the conditional probability table for a discrete variable whose parents are also discrete. - TODO: have this inherit from DiscreteFactor + TODO: have this inherit from DiscreteFactor implementing explicit factor methods """ def __init__(self, variable, variable_card, parents=[], parents_card=[], values=[]): @@ -22,9 +22,11 @@ class TabularCPD: self.parents = parents self.variables = [variable] + parents self.cardinality = [variable_card] + parents_card + self._values = np.array(values) - if values: - self.values = np.array(values) + @property + def values(self): + return self._values def get_values(self): """ @@ -34,3 +36,10 @@ class TabularCPD: return self.values.reshape(1, np.prod(self.cardinality)) else: return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) + + def copy(self): + return self.__class__(self.variable, + self.cardinality[0], + self.parents, + self.cardinality[1:], + self._values) diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py index 37aa437..02f5595 100644 --- a/beliefs/inference/belief_propagation.py +++ b/beliefs/inference/belief_propagation.py @@ -1,7 +1,8 @@ import numpy as np from collections import namedtuple -from beliefs.types.Node import InvalidLambdaMsgToParent +from beliefs.models.beliefupdate.Node import InvalidLambdaMsgToParent +from beliefs.models.beliefupdate.BeliefUpdateNodeModel import BeliefUpdateNodeModel from beliefs.utils.math_helper import is_kronecker_delta @@ -22,10 +23,12 @@ class BeliefPropagation: def __init__(self, model, inplace=True): """ Input: - model: an instance of BayesianModel class or subclass + model: an instance of BeliefUpdateNodeModel inplace: bool modify in-place the nodes in the model during belief propagation """ + if not isinstance(model, BeliefUpdateNodeModel): + raise TypeError("Model must be an instance of BeliefUpdateNodeModel") if inplace is False: self.model = model.copy() else: diff --git a/beliefs/models/BayesianModel.py b/beliefs/models/BayesianModel.py index 6257a57..b57f968 100644 --- a/beliefs/models/BayesianModel.py +++ b/beliefs/models/BayesianModel.py @@ -1,9 +1,7 @@ import copy -import numpy as np import networkx as nx from beliefs.models.DirectedGraph import DirectedGraph -from beliefs.utils.edges_helper import EdgesHelper from beliefs.utils.math_helper import is_kronecker_delta @@ -12,74 +10,30 @@ class BayesianModel(DirectedGraph): Bayesian model stores nodes and edges described by conditional probability distributions. """ - def __init__(self, edges, nodes_dict=None): + def __init__(self, edges=[], variables=[], cpds=[]): """ - Input: - edges: list of edge tuples of form ('parent', 'child') - nodes: (optional) dict - a dict key, value pair as {label_id: instance_of_node_class_or_subclass} - """ - if nodes_dict is not None: - super().__init__(edges, nodes_dict.keys()) - else: - super().__init__(edges) - self.nodes_dict = nodes_dict - - @classmethod - def from_node_class(cls, edges, node_class): - """Automatically create all nodes from the same node class + Base class for Bayesian model. Input: - edges: list of edge tuples of form ('parent', 'child') - node_class: (optional) the Node class or subclass from which to - create all the nodes from edges. - """ - nodes = cls.create_nodes(edges, node_class) - return cls.__init__(edges=edges, nodes=nodes) - - @staticmethod - def create_nodes(edges, node_class): - """Returns list of Node instances created from edges using - the default node_class""" - edges_helper = EdgesHelper(edges) - nodes = edges_helper.create_nodes_from_edges(node_class=node_class) - label_to_node = dict() - for node in nodes: - label_to_node[node.label_id] = node - return label_to_node - - def set_boundary_conditions(self): - """ - 1. Root nodes: if x is a node with no parents, set Pi(x) = prior - probability of x. - - 2. Leaf nodes: if x is a node with no children, set Lambda(x) - to an (unnormalized) unit vector, of length the cardinality of x. - """ - for root in self.get_roots(): - self.nodes_dict[root].pi_agg = self.nodes_dict[root].cpd.values - - for leaf in self.get_leaves(): - self.nodes_dict[leaf].lambda_agg = np.ones([self.nodes_dict[leaf].cardinality]) - - @property - def all_nodes_are_fully_initialized(self): - """ - Returns True if, for all nodes in the model, all lambda and pi - messages and lambda_agg and pi_agg are not None, else False. + edges: (optional) list of edges, + tuples of form ('parent', 'child') + variables: (optional) list of str or int + labels for variables + cpds: (optional) list of CPDs + TabularCPD class or subclass """ - for node in self.nodes_dict.values(): - if not node.is_fully_initialized: - return False - return True + super().__init__() + super().add_edges_from(edges) + super().add_nodes_from(variables) + self.cpds = cpds def copy(self): """ Returns a copy of the model. """ - copy_edges = list(self.edges()).copy() - copy_nodes = copy.deepcopy(self.nodes_dict) - copy_model = self.__class__(edges=copy_edges, nodes=copy_nodes) + copy_model = self.__class__(edges=list(self.edges()).copy(), + variables=list(self.nodes()).copy(), + cpds=[cpd.copy() for cpd in self.cpds]) return copy_model def get_variables_in_definite_state(self): diff --git a/beliefs/models/BernoulliOrModel.py b/beliefs/models/BernoulliOrModel.py deleted file mode 100644 index bf2b44c..0000000 --- a/beliefs/models/BernoulliOrModel.py +++ /dev/null @@ -1,17 +0,0 @@ -from beliefs.models.BayesianModel import BayesianModel -from beliefs.types.BernoulliOrNode import BernoulliOrNode - - -class BernoulliOrModel(BayesianModel): - """ - BernoulliOrModel stores node instances of BernoulliOrNodes (Bernoulli - variables associated with an OR conditional probability distribution). - """ - def __init__(self, edges, nodes=None): - """ - Input: - edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] - """ - if nodes is None: - nodes = self.create_nodes(edges, node_class=BernoulliOrNode) - super().__init__(edges, nodes_dict=nodes) diff --git a/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py b/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py new file mode 100644 index 0000000..d74eaa7 --- /dev/null +++ b/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py @@ -0,0 +1,91 @@ +import copy +import numpy as np + +from beliefs.models.BayesianModel import BayesianModel +from beliefs.utils.edges_helper import EdgesHelper + + +class BeliefUpdateNodeModel(BayesianModel): + """ + A Bayesian model storing nodes (e.g. Node or BernoulliOrNode) implementing properties + and methods for Pearl's belief update algorithm. + + ref: "Fusion, Propagation, and Structuring in Belief Networks" + Artificial Intelligence 29 (1986) 241-288 + + """ + def __init__(self, nodes_dict): + """ + Input: + nodes_dict: dict + a dict key, value pair as {label_id: instance_of_node_class_or_subclass} + """ + super().__init__(edges=self._get_edges_from_nodes(nodes_dict.values()), + variables=list(nodes_dict.keys()), + cpds=[node.cpd for node in nodes_dict.values()]) + + self.nodes_dict = nodes_dict + + @classmethod + def from_edges(cls, edges, node_class): + """Create nodes from the same node class. + + Input: + edges: list of edge tuples of form ('parent', 'child') + node_class: the Node class or subclass from which to + create all the nodes from edges. + """ + edges_helper = EdgesHelper(edges) + nodes = edges_helper.create_nodes_from_edges(node_class) + nodes_dict = {node.label_id: node for node in nodes} + return cls(nodes_dict) + + @staticmethod + def _get_edges_from_nodes(nodes): + """ + Return list of all directed edges in nodes. + + Args: + nodes: an iterable of objects of the Node class or subclass + Returns: + edges: list of edge tuples + """ + edges = set() + for node in nodes: + if node.parents: + edge_tuples = zip(node.parents, [node.label_id]*len(node.parents)) + edges.update(edge_tuples) + return list(edges) + + def set_boundary_conditions(self): + """ + 1. Root nodes: if x is a node with no parents, set Pi(x) = prior + probability of x. + + 2. Leaf nodes: if x is a node with no children, set Lambda(x) + to an (unnormalized) unit vector, of length the cardinality of x. + """ + for root in self.get_roots(): + self.nodes_dict[root].pi_agg = self.nodes_dict[root].cpd.values + + for leaf in self.get_leaves(): + self.nodes_dict[leaf].lambda_agg = np.ones([self.nodes_dict[leaf].cardinality]) + + @property + def all_nodes_are_fully_initialized(self): + """ + Returns True if, for all nodes in the model, all lambda and pi + messages and lambda_agg and pi_agg are not None, else False. + """ + for node in self.nodes_dict.values(): + if not node.is_fully_initialized: + return False + return True + + def copy(self): + """ + Returns a copy of the model. + """ + copy_nodes = copy.deepcopy(self.nodes_dict) + copy_model = self.__class__(nodes_dict=copy_nodes) + return copy_model diff --git a/beliefs/models/beliefupdate/BernoulliOrNode.py b/beliefs/models/beliefupdate/BernoulliOrNode.py new file mode 100644 index 0000000..3386275 --- /dev/null +++ b/beliefs/models/beliefupdate/BernoulliOrNode.py @@ -0,0 +1,47 @@ +import numpy as np +from functools import reduce + +from beliefs.models.beliefupdate.Node import ( + Node, + MessageType, + InvalidLambdaMsgToParent +) +from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD + + +class BernoulliOrNode(Node): + def __init__(self, + label_id, + children, + parents): + super().__init__(label_id=label_id, + children=children, + parents=parents, + cardinality=2, + cpd=BernoulliOrCPD(label_id, parents)) + + def compute_pi_agg(self): + if not self.parents: + self.pi_agg = self.cpd.values + else: + pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + parents_p0 = [p[0] for p in pi_msg_values] + p_0 = reduce(lambda x, y: x*y, parents_p0) + p_1 = 1 - p_0 + self.pi_agg = np.array([p_0, p_1]) + return self.pi_agg + + def compute_lambda_msg_to_parent(self, parent_k): + if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): + return np.ones([self.cardinality]) + else: + # TODO: cleanup this validation + _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] + p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) + lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product + lambda_1 = self.lambda_agg[1] + lambda_msg = np.array([lambda_0, lambda_1]) + if not any(lambda_msg): + raise InvalidLambdaMsgToParent + return self._normalize(lambda_msg) diff --git a/beliefs/models/beliefupdate/Node.py b/beliefs/models/beliefupdate/Node.py new file mode 100644 index 0000000..daa2f14 --- /dev/null +++ b/beliefs/models/beliefupdate/Node.py @@ -0,0 +1,179 @@ +import numpy as np +from functools import reduce +from enum import Enum + + +class InvalidLambdaMsgToParent(Exception): + """Computed invalid lambda msg to send to parent.""" + pass + + +class MessageType(Enum): + LAMBDA = 'lambda' + PI = 'pi' + + +class Node: + """A node in a DAG with methods to compute the belief (marginal probability + of the node given evidence) and compute pi/lambda messages to/from its neighbors + to update its belief. + + Implemented from Pearl's belief propagation algorithm. + """ + def __init__(self, + label_id, + children, + parents, + cardinality, + cpd): + """ + Args + label_id: str + children: set of strings + parents: set of strings + cardinality: int, cardinality of the random variable the node represents + cpd: an instance of a conditional probability distribution, + e.g. BernoulliOrCPD or TabularCPD + """ + self.label_id = label_id + self.children = children + self.parents = parents + self.cardinality = cardinality + self.cpd = cpd + + self.pi_agg = None # np.array dimensions [1, cardinality] + self.lambda_agg = None # np.array dimensions [1, cardinality] + + self.pi_received_msgs = self._init_received_msgs(parents) + self.lambda_received_msgs = self._init_received_msgs(children) + + @classmethod + def from_cpd_class(cls, + label_id, + children, + parents, + cardinality, + cpd_class): + cpd = cpd_class(label_id, parents) + return cls(label_id, children, parents, cardinality, cpd) + + @property + def belief(self): + if self.pi_agg.any() and self.lambda_agg.any(): + belief = np.multiply(self.pi_agg, self.lambda_agg) + return self._normalize(belief) + else: + return None + + def _normalize(self, value): + return value/value.sum() + + @staticmethod + def _init_received_msgs(keys): + return {k: None for k in keys} + + def _return_msgs_received_for_msg_type(self, message_type): + """ + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + if message_type == MessageType.LAMBDA: + msg_values = [msg for msg in self.lambda_received_msgs.values()] + elif message_type == MessageType.PI: + msg_values = [msg for msg in self.pi_received_msgs.values()] + return msg_values + + def validate_and_return_msgs_received_for_msg_type(self, message_type): + """ + Check that all messages have been received from children (parents). + Raise error if all messages have not been received. Called + before calculating lambda_agg (pi_agg). + + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + msg_values = self._return_msgs_received_for_msg_type(message_type) + + if any(msg is None for msg in msg_values): + raise ValueError( + "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg.". + format(msg_type=message_type.value) + ) + else: + return msg_values + + def compute_pi_agg(self): + # TODO: implement explict factor product operation + raise NotImplementedError + + def compute_lambda_agg(self): + if not self.children: + return self.lambda_agg + else: + lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) + self.lambda_agg = reduce(np.multiply, lambda_msg_values) + return self.lambda_agg + + def _update_received_msg_by_key(self, received_msg_dict, key, new_value): + if key not in received_msg_dict.keys(): + raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}". + format(key, received_msg_dict.keys())) + + if not isinstance(new_value, np.ndarray): + raise TypeError("Expected a new value of type numpy.ndarray, but got type {}". + format(type(new_value))) + + if new_value.shape != (self.cardinality,): + raise ValueError("Expected new value to be of dimensions ({},) but got {} instead". + format(self.cardinality, new_value.shape)) + received_msg_dict[key] = new_value + + def update_pi_msg_from_parent(self, parent, new_value): + self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, + key=parent, + new_value=new_value) + + def update_lambda_msg_from_child(self, child, new_value): + self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, + key=child, + new_value=new_value) + + def compute_pi_msg_to_child(self, child_k): + lambda_msg_from_child = self.lambda_received_msgs[child_k] + if lambda_msg_from_child is not None: + with np.errstate(divide='ignore', invalid='ignore'): + # 0/0 := 0 + return self._normalize( + np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) + else: + raise ValueError("Can't compute pi message to child_{} without having received" \ + "a lambda message from that child.") + + def compute_lambda_msg_to_parent(self, parent_k): + # TODO: implement explict factor product operation + raise NotImplementedError + + @property + def is_fully_initialized(self): + """ + Returns True if all lambda and pi messages and lambda_agg and + pi_agg are not None, else False. + """ + lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) + if any(msg is None for msg in lambda_msgs): + return False + + pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) + if any(msg is None for msg in pi_msgs): + return False + + if (self.pi_agg is None) or (self.lambda_agg is None): + return False + + return True diff --git a/beliefs/types/BernoulliOrNode.py b/beliefs/types/BernoulliOrNode.py deleted file mode 100644 index ce497b9..0000000 --- a/beliefs/types/BernoulliOrNode.py +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np -from functools import reduce - -from beliefs.types.Node import ( - Node, - MessageType, - InvalidLambdaMsgToParent -) -from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD - - -class BernoulliOrNode(Node): - def __init__(self, - label_id, - children, - parents): - super().__init__(label_id=label_id, - children=children, - parents=parents, - cardinality=2, - cpd=BernoulliOrCPD(label_id, parents)) - - def compute_pi_agg(self): - if not self.parents: - self.pi_agg = self.cpd.values - else: - pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) - parents_p0 = [p[0] for p in pi_msg_values] - p_0 = reduce(lambda x, y: x*y, parents_p0) - p_1 = 1 - p_0 - self.pi_agg = np.array([p_0, p_1]) - return self.pi_agg - - def compute_lambda_msg_to_parent(self, parent_k): - if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): - return np.ones([self.cardinality]) - else: - # TODO: cleanup this validation - _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) - p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] - p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) - lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product - lambda_1 = self.lambda_agg[1] - lambda_msg = np.array([lambda_0, lambda_1]) - if not any(lambda_msg): - raise InvalidLambdaMsgToParent - return self._normalize(lambda_msg) diff --git a/beliefs/types/Node.py b/beliefs/types/Node.py deleted file mode 100644 index a496acf..0000000 --- a/beliefs/types/Node.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np -from functools import reduce -from enum import Enum - - -class InvalidLambdaMsgToParent(Exception): - """Computed invalid lambda msg to send to parent.""" - pass - - -class MessageType(Enum): - LAMBDA = 'lambda' - PI = 'pi' - - -class Node: - """A node in a DAG with methods to compute the belief (marginal probability - of the node given evidence) and compute pi/lambda messages to/from its neighbors - to update its belief. - - Implemented from Pearl's belief propagation algorithm. - """ - def __init__(self, - label_id, - children, - parents, - cardinality, - cpd): - """ - Input: - label_id: str - children: str - parents: set of strings - cardinality: int, cardinality of the random variable the node represents - cpd: an instance of a conditional probability distribution, - e.g. BernoulliOrCPD or TabularCPD - """ - self.label_id = label_id - self.children = children - self.parents = parents - self.cardinality = cardinality - self.cpd = cpd - - self.pi_agg = None # np.array dimensions [1, cardinality] - self.lambda_agg = None # np.array dimensions [1, cardinality] - - self.pi_received_msgs = self._init_received_msgs(parents) - self.lambda_received_msgs = self._init_received_msgs(children) - - @classmethod - def from_cpd_class(cls, - label_id, - children, - parents, - cardinality, - cpd_class): - cpd = cpd_class(label_id, parents) - return cls(label_id, children, parents, cardinality, cpd) - - @property - def belief(self): - if self.pi_agg.any() and self.lambda_agg.any(): - belief = np.multiply(self.pi_agg, self.lambda_agg) - return self._normalize(belief) - else: - return None - - def _normalize(self, value): - return value/value.sum() - - @staticmethod - def _init_received_msgs(keys): - return {k: None for k in keys} - - def _return_msgs_received_for_msg_type(self, message_type): - """ - Input: - message_type: MessageType enum - - Returns: - msg_values: list of message values (each an np.array) - """ - if message_type == MessageType.LAMBDA: - msg_values = [msg for msg in self.lambda_received_msgs.values()] - elif message_type == MessageType.PI: - msg_values = [msg for msg in self.pi_received_msgs.values()] - return msg_values - - def validate_and_return_msgs_received_for_msg_type(self, message_type): - """ - Check that all messages have been received from children (parents). - Raise error if all messages have not been received. Called - before calculating lambda_agg (pi_agg). - - Input: - message_type: MessageType enum - - Returns: - msg_values: list of message values (each an np.array) - """ - msg_values = self._return_msgs_received_for_msg_type(message_type) - - if any(msg is None for msg in msg_values): - raise ValueError( - "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg.". - format(msg_type=message_type.value) - ) - else: - return msg_values - - def compute_pi_agg(self): - # TODO: implement explict factor product operation - raise NotImplementedError - - def compute_lambda_agg(self): - if not self.children: - return self.lambda_agg - else: - lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) - self.lambda_agg = reduce(np.multiply, lambda_msg_values) - return self.lambda_agg - - def _update_received_msg_by_key(self, received_msg_dict, key, new_value): - if key not in received_msg_dict.keys(): - raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}". - format(key, received_msg_dict.keys())) - - if not isinstance(new_value, np.ndarray): - raise TypeError("Expected a new value of type numpy.ndarray, but got type {}". - format(type(new_value))) - - if new_value.shape != (self.cardinality,): - raise ValueError("Expected new value to be of dimensions ({},) but got {} instead". - format(self.cardinality, new_value.shape)) - received_msg_dict[key] = new_value - - def update_pi_msg_from_parent(self, parent, new_value): - self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, - key=parent, - new_value=new_value) - - def update_lambda_msg_from_child(self, child, new_value): - self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, - key=child, - new_value=new_value) - - def compute_pi_msg_to_child(self, child_k): - lambda_msg_from_child = self.lambda_received_msgs[child_k] - if lambda_msg_from_child is not None: - with np.errstate(divide='ignore', invalid='ignore'): - # 0/0 := 0 - return self._normalize( - np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) - else: - raise ValueError("Can't compute pi message to child_{} without having received" \ - "a lambda message from that child.") - - def compute_lambda_msg_to_parent(self, parent_k): - # TODO: implement explict factor product operation - raise NotImplementedError - - @property - def is_fully_initialized(self): - """ - Returns True if all lambda and pi messages and lambda_agg and - pi_agg are not None, else False. - """ - lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) - if any(msg is None for msg in lambda_msgs): - return False - - pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) - if any(msg is None for msg in pi_msgs): - return False - - if (self.pi_agg is None) or (self.lambda_agg is None): - return False - - return True diff --git a/beliefs/types/__init__.py b/beliefs/types/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/beliefs/utils/edges_helper.py b/beliefs/utils/edges_helper.py index c959a3b..130686c 100644 --- a/beliefs/utils/edges_helper.py +++ b/beliefs/utils/edges_helper.py @@ -1,6 +1,6 @@ from collections import defaultdict -from beliefs.types.Node import Node +from beliefs.models.beliefupdate.Node import Node from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD diff --git a/tests/test_belief_propagation.py b/tests/test_belief_propagation.py index 24ee94b..264ddae 100644 --- a/tests/test_belief_propagation.py +++ b/tests/test_belief_propagation.py @@ -3,8 +3,8 @@ import pytest from pytest import approx from beliefs.inference.belief_propagation import BeliefPropagation, ConflictingEvidenceError -from beliefs.models.BernoulliOrModel import BernoulliOrModel -from beliefs.types.BernoulliOrNode import BernoulliOrNode +from beliefs.models.beliefupdate.BeliefUpdateNodeModel import BeliefUpdateNodeModel +from beliefs.models.beliefupdate.BernoulliOrNode import BernoulliOrNode @pytest.fixture(scope='module') @@ -37,24 +37,23 @@ def many_parents_edges(): @pytest.fixture(scope='function') def four_node_model(edges_four_nodes): - return BernoulliOrModel(edges_four_nodes) + return BeliefUpdateNodeModel.from_edges(edges_four_nodes, BernoulliOrNode) @pytest.fixture(scope='function') def simple_model(simple_edges): - return BernoulliOrModel(simple_edges) + return BeliefUpdateNodeModel.from_edges(simple_edges, BernoulliOrNode) @pytest.fixture(scope='function') def many_parents_model(many_parents_edges): - return BernoulliOrModel(many_parents_edges) + return BeliefUpdateNodeModel.from_edges(many_parents_edges, BernoulliOrNode) @pytest.fixture(scope='function') def one_node_model(): a_node = BernoulliOrNode(label_id='x', children=[], parents=[]) - # a_node = BernoulliOrNode(label_id='x', children=set(), parents=set()) - return BernoulliOrModel(edges=None, nodes={'x': a_node}) + return BeliefUpdateNodeModel(nodes_dict={'x': a_node}) def get_label_mapped_to_positive_belief(query_result): -- cgit v1.2.3 From e5937060658f7e8ac484e5489f2b35b4ecb96d35 Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Sun, 3 Dec 2017 17:38:42 -0800 Subject: fix Makefile tests --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 805f519..1f33fc4 100644 --- a/Makefile +++ b/Makefile @@ -104,7 +104,7 @@ test-in-clean-env: verify-conda-build-installed # run tests in the current environment test-in-current-env: git lfs fetch - echo TEST + pytest tests -vv #################################################################################################### # helper commands -- cgit v1.2.3 From 8dc7ae89677fca16ee974a30cff8c4df53c955ce Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Sun, 3 Dec 2017 19:16:32 -0800 Subject: PR comments --- beliefs/factors/BernoulliOrCPD.py | 42 --- beliefs/factors/CPD.py | 45 --- beliefs/factors/bernoulli_or_cpd.py | 42 +++ beliefs/factors/cpd.py | 45 +++ beliefs/inference/belief_propagation.py | 44 +-- beliefs/models/BayesianModel.py | 119 -------- beliefs/models/DirectedGraph.py | 36 --- beliefs/models/base_models.py | 154 ++++++++++ beliefs/models/belief_update_node_model.py | 315 +++++++++++++++++++++ .../models/beliefupdate/BeliefUpdateNodeModel.py | 91 ------ beliefs/models/beliefupdate/BernoulliOrNode.py | 47 --- beliefs/models/beliefupdate/Node.py | 179 ------------ beliefs/utils/edges_helper.py | 136 --------- tests/test_belief_propagation.py | 12 +- 14 files changed, 588 insertions(+), 719 deletions(-) delete mode 100644 beliefs/factors/BernoulliOrCPD.py delete mode 100644 beliefs/factors/CPD.py create mode 100644 beliefs/factors/bernoulli_or_cpd.py create mode 100644 beliefs/factors/cpd.py delete mode 100644 beliefs/models/BayesianModel.py delete mode 100644 beliefs/models/DirectedGraph.py create mode 100644 beliefs/models/base_models.py create mode 100644 beliefs/models/belief_update_node_model.py delete mode 100644 beliefs/models/beliefupdate/BeliefUpdateNodeModel.py delete mode 100644 beliefs/models/beliefupdate/BernoulliOrNode.py delete mode 100644 beliefs/models/beliefupdate/Node.py delete mode 100644 beliefs/utils/edges_helper.py diff --git a/beliefs/factors/BernoulliOrCPD.py b/beliefs/factors/BernoulliOrCPD.py deleted file mode 100644 index 2c6a31e..0000000 --- a/beliefs/factors/BernoulliOrCPD.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - -from beliefs.factors.CPD import TabularCPD - - -class BernoulliOrCPD(TabularCPD): - """CPD class for a Bernoulli random variable whose relationship to its - parents (also Bernoulli random variables) is described by OR logic. - - If at least one of the variable's parents is True, then the variable - is True, and False otherwise. - """ - def __init__(self, variable, parents=[]): - """ - Args: - variable: int or string - parents: optional, list of int and/or strings - """ - super().__init__(variable=variable, - variable_card=2, - parents=parents, - parents_card=[2]*len(parents), - values=[]) - self._values = [] - - @property - def values(self): - if not any(self._values): - self._values = self._build_kwise_values_array(len(self.variables)) - self._values = self._values.reshape(self.cardinality) - return self._values - - @staticmethod - def _build_kwise_values_array(k): - # special case a completely independent factor, and - # return the uniform prior - if k == 1: - return np.array([0.5, 0.5]) - - return np.array( - [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) - ) diff --git a/beliefs/factors/CPD.py b/beliefs/factors/CPD.py deleted file mode 100644 index a286aaa..0000000 --- a/beliefs/factors/CPD.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np - - -class TabularCPD: - """ - Defines the conditional probability table for a discrete variable - whose parents are also discrete. - - TODO: have this inherit from DiscreteFactor implementing explicit factor methods - """ - def __init__(self, variable, variable_card, - parents=[], parents_card=[], values=[]): - """ - Args: - variable: int or string - variable_card: int - parents: optional, list of int and/or strings - parents_card: optional, list of int - values: optional, 2d list or array - """ - self.variable = variable - self.parents = parents - self.variables = [variable] + parents - self.cardinality = [variable_card] + parents_card - self._values = np.array(values) - - @property - def values(self): - return self._values - - def get_values(self): - """ - Returns the tabular cpd form of the values. - """ - if len(self.cardinality) == 1: - return self.values.reshape(1, np.prod(self.cardinality)) - else: - return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) - - def copy(self): - return self.__class__(self.variable, - self.cardinality[0], - self.parents, - self.cardinality[1:], - self._values) diff --git a/beliefs/factors/bernoulli_or_cpd.py b/beliefs/factors/bernoulli_or_cpd.py new file mode 100644 index 0000000..bfb3a95 --- /dev/null +++ b/beliefs/factors/bernoulli_or_cpd.py @@ -0,0 +1,42 @@ +import numpy as np + +from beliefs.factors.cpd import TabularCPD + + +class BernoulliOrCPD(TabularCPD): + """CPD class for a Bernoulli random variable whose relationship to its + parents (also Bernoulli random variables) is described by OR logic. + + If at least one of the variable's parents is True, then the variable + is True, and False otherwise. + """ + def __init__(self, variable, parents=[]): + """ + Args: + variable: int or string + parents: optional, list of int and/or strings + """ + super().__init__(variable=variable, + variable_card=2, + parents=parents, + parents_card=[2]*len(parents), + values=[]) + self._values = [] + + @property + def values(self): + if not any(self._values): + self._values = self._build_kwise_values_array(len(self.variables)) + self._values = self._values.reshape(self.cardinality) + return self._values + + @staticmethod + def _build_kwise_values_array(k): + # special case a completely independent factor, and + # return the uniform prior + if k == 1: + return np.array([0.5, 0.5]) + + return np.array( + [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) + ) diff --git a/beliefs/factors/cpd.py b/beliefs/factors/cpd.py new file mode 100644 index 0000000..a286aaa --- /dev/null +++ b/beliefs/factors/cpd.py @@ -0,0 +1,45 @@ +import numpy as np + + +class TabularCPD: + """ + Defines the conditional probability table for a discrete variable + whose parents are also discrete. + + TODO: have this inherit from DiscreteFactor implementing explicit factor methods + """ + def __init__(self, variable, variable_card, + parents=[], parents_card=[], values=[]): + """ + Args: + variable: int or string + variable_card: int + parents: optional, list of int and/or strings + parents_card: optional, list of int + values: optional, 2d list or array + """ + self.variable = variable + self.parents = parents + self.variables = [variable] + parents + self.cardinality = [variable_card] + parents_card + self._values = np.array(values) + + @property + def values(self): + return self._values + + def get_values(self): + """ + Returns the tabular cpd form of the values. + """ + if len(self.cardinality) == 1: + return self.values.reshape(1, np.prod(self.cardinality)) + else: + return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) + + def copy(self): + return self.__class__(self.variable, + self.cardinality[0], + self.parents, + self.cardinality[1:], + self._values) diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py index 02f5595..7ec648d 100644 --- a/beliefs/inference/belief_propagation.py +++ b/beliefs/inference/belief_propagation.py @@ -1,11 +1,17 @@ import numpy as np from collections import namedtuple +import logging -from beliefs.models.beliefupdate.Node import InvalidLambdaMsgToParent -from beliefs.models.beliefupdate.BeliefUpdateNodeModel import BeliefUpdateNodeModel +from beliefs.models.belief_update_node_model import ( + InvalidLambdaMsgToParent, + BeliefUpdateNodeModel +) from beliefs.utils.math_helper import is_kronecker_delta +logger = logging.getLogger(__name__) + + MsgPassers = namedtuple('MsgPassers', ['msg_receiver', 'msg_sender']) @@ -51,7 +57,7 @@ class BeliefPropagation: return node_to_update_label_id, msg_sender_label_id = nodes_to_update.pop() - print("Node", node_to_update_label_id) + logging.info("Node: %s", node_to_update_label_id) node = self.model.nodes_dict[node_to_update_label_id] @@ -59,8 +65,8 @@ class BeliefPropagation: # outgoing msg from the node to update parent_ids = set(node.parents) - set([msg_sender_label_id]) child_ids = set(node.children) - set([msg_sender_label_id]) - print("parent_ids:", parent_ids) - print("child_ids:", child_ids) + logging.info("parent_ids: %s", str(parent_ids)) + logging.info("child_ids: %s", str(child_ids)) if msg_sender_label_id is not None: # update triggered by receiving a message, not pinning to evidence @@ -68,9 +74,9 @@ class BeliefPropagation: if node_to_update_label_id not in evidence: node.compute_pi_agg() - print("belief propagation pi_agg", node.pi_agg) + logging.info("belief propagation pi_agg: %s", np.array2string(node.pi_agg)) node.compute_lambda_agg() - print("belief propagation lambda_agg", node.lambda_agg) + logging.info("belief propagation lambda_agg: %s", np.array2string(node.lambda_agg)) for parent_id in parent_ids: try: @@ -114,13 +120,13 @@ class BeliefPropagation: for child in node.lambda_received_msgs.keys(): node.update_lambda_msg_from_child(child=child, new_value=ones_vector) - print("Finished initializing Lambda(x) and lambda_received_msgs per node.") + logging.info("Finished initializing Lambda(x) and lambda_received_msgs per node.") - print("Start downward sweep from nodes. Sending Pi messages only.") + logging.info("Start downward sweep from nodes. Sending Pi messages only.") topdown_order = self.model.get_topologically_sorted_nodes(reverse=False) for node_id in topdown_order: - print('label in iteration through top-down order:', node_id) + logging.info('label in iteration through top-down order: %s', str(node_id)) node_sending_msg = self.model.nodes_dict[node_id] child_ids = node_sending_msg.children @@ -129,9 +135,9 @@ class BeliefPropagation: node_sending_msg.compute_pi_agg() for child_id in child_ids: - print("child", child_id) + logging.info("child: %s", str(child_id)) new_pi_msg = node_sending_msg.compute_pi_msg_to_child(child_k=child_id) - print(new_pi_msg) + logging.info("new_pi_msg: %s", np.array2string(new_pi_msg)) child_node = self.model.nodes_dict[child_id] child_node.update_pi_msg_from_parent(parent=node_id, @@ -158,10 +164,9 @@ class BeliefPropagation: self.model.nodes_dict[evidence_id].lambda_agg = \ self.model.nodes_dict[evidence_id].lambda_agg * observed_value - nodes_to_update.add(MsgPassers(msg_receiver=evidence_id, - msg_sender=None)) + nodes_to_update = [MsgPassers(msg_receiver=evidence_id, msg_sender=None)] - self._belief_propagation(nodes_to_update=nodes_to_update, + self._belief_propagation(nodes_to_update=set(nodes_to_update), evidence=evidence) def query(self, evidence={}): @@ -179,12 +184,13 @@ class BeliefPropagation: Example ------- - >> from label_graph_service.pgm.inference.belief_propagation import BeliefPropagation - >> from label_graph_service.pgm.models.BernoulliOrModel import BernoulliOrModel + >> import numpy as np + >> from beliefs.inference.belief_propagation import BeliefPropagation + >> from beliefs.models.belief_update_node_model import BeliefUpdateNodeModel, BernoulliOrNode >> edges = [('1', '3'), ('2', '3'), ('3', '5')] - >> model = BernoulliOrModel(edges) + >> model = BeliefUpdateNodeModel.init_from_edges(edges, BernoulliOrNode) >> infer = BeliefPropagation(model) - >> result = infer.query({'2': np.array([0, 1])}) + >> result = infer.query(evidence={'2': np.array([0, 1])}) """ if not self.model.all_nodes_are_fully_initialized: self.initialize_model() diff --git a/beliefs/models/BayesianModel.py b/beliefs/models/BayesianModel.py deleted file mode 100644 index b57f968..0000000 --- a/beliefs/models/BayesianModel.py +++ /dev/null @@ -1,119 +0,0 @@ -import copy -import networkx as nx - -from beliefs.models.DirectedGraph import DirectedGraph -from beliefs.utils.math_helper import is_kronecker_delta - - -class BayesianModel(DirectedGraph): - """ - Bayesian model stores nodes and edges described by conditional probability - distributions. - """ - def __init__(self, edges=[], variables=[], cpds=[]): - """ - Base class for Bayesian model. - - Input: - edges: (optional) list of edges, - tuples of form ('parent', 'child') - variables: (optional) list of str or int - labels for variables - cpds: (optional) list of CPDs - TabularCPD class or subclass - """ - super().__init__() - super().add_edges_from(edges) - super().add_nodes_from(variables) - self.cpds = cpds - - def copy(self): - """ - Returns a copy of the model. - """ - copy_model = self.__class__(edges=list(self.edges()).copy(), - variables=list(self.nodes()).copy(), - cpds=[cpd.copy() for cpd in self.cpds]) - return copy_model - - def get_variables_in_definite_state(self): - """ - Returns a set of labels of all nodes in a definite state, i.e. with - label values that are kronecker deltas. - - RETURNS - set of strings (labels) - """ - return {label for label, node in self.nodes_dict.items() if is_kronecker_delta(node.belief)} - - def get_unobserved_variables_in_definite_state(self, observed=set()): - """ - Returns a set of labels that are inferred to be in definite state, given - list of labels that were directly observed (e.g. YES/NOs, but not MAYBEs). - - INPUT - observed: set of strings, directly observed labels - RETURNS - set of strings, labels inferred to be in a definite state - """ - - # Assert that beliefs of directly observed vars are kronecker deltas - for label in observed: - assert is_kronecker_delta(self.nodes_dict[label].belief), \ - ("Observed label has belief {} but should be kronecker delta" - .format(self.nodes_dict[label].belief)) - - vars_in_definite_state = self.get_variables_in_definite_state() - assert observed <= vars_in_definite_state, \ - "Expected set of observed labels to be a subset of labels in definite state." - return vars_in_definite_state - observed - - def _get_ancestors_of(self, observed): - """Return list of ancestors of observed labels, including the observed labels themselves.""" - ancestors = observed.copy() - for label in observed: - ancestors.update(nx.ancestors(self, label)) - return ancestors - - def reachable_observed_variables(self, source, observed=set()): - """ - Returns list of observed labels (labels with direct evidence to be in a definite - state) that are reachable from the source. - - INPUT - source: string, label of node for which to evaluate reachable observed labels - observed: set of strings, directly observed labels - RETURNS - reachable_observed_vars: set of strings, observed labels (variables with direct - evidence) that are reachable from the source label. - """ - ancestors_of_observed = self._get_ancestors_of(observed) - - visit_list = set() - visit_list.add((source, 'up')) - traversed_list = set() - reachable_observed_vars = set() - - while visit_list: - node, direction = visit_list.pop() - if (node, direction) not in traversed_list: - if node in observed: - reachable_observed_vars.add(node) - traversed_list.add((node, direction)) - if direction == 'up' and node not in observed: - for parent in self.predecessors(node): - # causal flow - visit_list.add((parent, 'up')) - for child in self.successors(node): - # common cause flow - visit_list.add((child, 'down')) - elif direction == 'down': - if node not in observed: - # evidential flow - for child in self.successors(node): - visit_list.add((child, 'down')) - if node in ancestors_of_observed: - # common effect flow (activated v-structure) - for parent in self.predecessors(node): - visit_list.add((parent, 'up')) - return reachable_observed_vars diff --git a/beliefs/models/DirectedGraph.py b/beliefs/models/DirectedGraph.py deleted file mode 100644 index 84b3a02..0000000 --- a/beliefs/models/DirectedGraph.py +++ /dev/null @@ -1,36 +0,0 @@ -import networkx as nx - - -class DirectedGraph(nx.DiGraph): - """ - Base class for all directed graphical models. - """ - def __init__(self, edges=None, node_labels=None): - """ - Input: - edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] - node_labels: a list of strings of node labels - """ - super().__init__() - if edges is not None: - self.add_edges_from(edges) - if node_labels is not None: - self.add_nodes_from(node_labels) - - def get_leaves(self): - """ - Returns a list of leaves of the graph. - """ - return [node for node, out_degree in self.out_degree() if out_degree == 0] - - def get_roots(self): - """ - Returns a list of roots of the graph. - """ - return [node for node, in_degree in self.in_degree() if in_degree == 0] - - def get_topologically_sorted_nodes(self, reverse=False): - if reverse: - return list(reversed(list(nx.topological_sort(self)))) - else: - return nx.topological_sort(self) diff --git a/beliefs/models/base_models.py b/beliefs/models/base_models.py new file mode 100644 index 0000000..cb91566 --- /dev/null +++ b/beliefs/models/base_models.py @@ -0,0 +1,154 @@ +import networkx as nx + +from beliefs.utils.math_helper import is_kronecker_delta + + +class DirectedGraph(nx.DiGraph): + """ + Base class for all directed graphical models. + """ + def __init__(self, edges=None, node_labels=None): + """ + Input: + edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] + node_labels: a list of strings of node labels + """ + super().__init__() + if edges is not None: + self.add_edges_from(edges) + if node_labels is not None: + self.add_nodes_from(node_labels) + + def get_leaves(self): + """ + Returns a list of leaves of the graph. + """ + return [node for node, out_degree in self.out_degree() if out_degree == 0] + + def get_roots(self): + """ + Returns a list of roots of the graph. + """ + return [node for node, in_degree in self.in_degree() if in_degree == 0] + + def get_topologically_sorted_nodes(self, reverse=False): + if reverse: + return list(reversed(list(nx.topological_sort(self)))) + else: + return nx.topological_sort(self) + + +class BayesianModel(DirectedGraph): + """ + Bayesian model stores nodes and edges described by conditional probability + distributions. + """ + def __init__(self, edges=[], variables=[], cpds=[]): + """ + Base class for Bayesian model. + + Input: + edges: (optional) list of edges, + tuples of form ('parent', 'child') + variables: (optional) list of str or int + labels for variables + cpds: (optional) list of CPDs + TabularCPD class or subclass + """ + super().__init__() + super().add_edges_from(edges) + super().add_nodes_from(variables) + self.cpds = cpds + + def copy(self): + """ + Returns a copy of the model. + """ + copy_model = self.__class__(edges=list(self.edges()).copy(), + variables=list(self.nodes()).copy(), + cpds=[cpd.copy() for cpd in self.cpds]) + return copy_model + + def get_variables_in_definite_state(self): + """ + Returns a set of labels of all nodes in a definite state, i.e. with + label values that are kronecker deltas. + + RETURNS + set of strings (labels) + """ + return {label for label, node in self.nodes_dict.items() if is_kronecker_delta(node.belief)} + + def get_unobserved_variables_in_definite_state(self, observed=set()): + """ + Returns a set of labels that are inferred to be in definite state, given + list of labels that were directly observed (e.g. YES/NOs, but not MAYBEs). + + INPUT + observed: set of strings, directly observed labels + RETURNS + set of strings, labels inferred to be in a definite state + """ + + # Assert that beliefs of directly observed vars are kronecker deltas + for label in observed: + assert is_kronecker_delta(self.nodes_dict[label].belief), \ + ("Observed label has belief {} but should be kronecker delta" + .format(self.nodes_dict[label].belief)) + + vars_in_definite_state = self.get_variables_in_definite_state() + assert observed <= vars_in_definite_state, \ + "Expected set of observed labels to be a subset of labels in definite state." + return vars_in_definite_state - observed + + def _get_ancestors_of(self, observed): + """Return list of ancestors of observed labels""" + ancestors = set() + for label in observed: + ancestors.update(nx.ancestors(self, label)) + return ancestors + + def reachable_observed_variables(self, source, observed=set()): + """ + Returns list of observed labels (labels with direct evidence to be in a definite + state) that are reachable from the source. + + INPUT + source: string, label of node for which to evaluate reachable observed labels + observed: set of strings, directly observed labels + RETURNS + reachable_observed_vars: set of strings, observed labels (variables with direct + evidence) that are reachable from the source label. + """ + # ancestors of observed labels, including observed labels + ancestors_of_observed = self._get_ancestors_of(observed) + ancestors_of_observed.update(observed) + + visit_list = set() + visit_list.add((source, 'up')) + traversed_list = set() + reachable_observed_vars = set() + + while visit_list: + node, direction = visit_list.pop() + if (node, direction) not in traversed_list: + if node in observed: + reachable_observed_vars.add(node) + traversed_list.add((node, direction)) + if direction == 'up' and node not in observed: + for parent in self.predecessors(node): + # causal flow + visit_list.add((parent, 'up')) + for child in self.successors(node): + # common cause flow + visit_list.add((child, 'down')) + elif direction == 'down': + if node not in observed: + # evidential flow + for child in self.successors(node): + visit_list.add((child, 'down')) + if node in ancestors_of_observed: + # common effect flow (activated v-structure) + for parent in self.predecessors(node): + visit_list.add((parent, 'up')) + return reachable_observed_vars diff --git a/beliefs/models/belief_update_node_model.py b/beliefs/models/belief_update_node_model.py new file mode 100644 index 0000000..667e0f1 --- /dev/null +++ b/beliefs/models/belief_update_node_model.py @@ -0,0 +1,315 @@ +import copy +from enum import Enum +import numpy as np +import itertools +from functools import reduce + +import networkx as nx + +from beliefs.models.base_models import BayesianModel +from beliefs.factors.bernoulli_or_cpd import BernoulliOrCPD + + +class InvalidLambdaMsgToParent(Exception): + """Computed invalid lambda msg to send to parent.""" + pass + + +class MessageType(Enum): + LAMBDA = 'lambda' + PI = 'pi' + + +class BeliefUpdateNodeModel(BayesianModel): + """ + A Bayesian model storing nodes (e.g. Node or BernoulliOrNode) implementing properties + and methods for Pearl's belief update algorithm. + + ref: "Fusion, Propagation, and Structuring in Belief Networks" + Artificial Intelligence 29 (1986) 241-288 + + """ + def __init__(self, nodes_dict): + """ + Input: + nodes_dict: dict + a dict key, value pair as {label_id: instance_of_node_class_or_subclass} + """ + super().__init__(edges=self._get_edges_from_nodes(nodes_dict.values()), + variables=list(nodes_dict.keys()), + cpds=[node.cpd for node in nodes_dict.values()]) + + self.nodes_dict = nodes_dict + + @classmethod + def init_from_edges(cls, edges, node_class): + """Create nodes from the same node class. + + Input: + edges: list of edge tuples of form ('parent', 'child') + node_class: the Node class or subclass from which to + create all the nodes from edges. + """ + nodes = set() + g = nx.DiGraph(edges) + + for label in set(itertools.chain(*edges)): + node = node_class(label_id=label, + children=list(g.successors(label)), + parents=list(g.predecessors(label))) + nodes.add(node) + nodes_dict = {node.label_id: node for node in nodes} + return cls(nodes_dict) + + @staticmethod + def _get_edges_from_nodes(nodes): + """ + Return list of all directed edges in nodes. + + Args: + nodes: an iterable of objects of the Node class or subclass + Returns: + edges: list of edge tuples + """ + edges = set() + for node in nodes: + if node.parents: + edge_tuples = zip(node.parents, [node.label_id]*len(node.parents)) + edges.update(edge_tuples) + return list(edges) + + def set_boundary_conditions(self): + """ + 1. Root nodes: if x is a node with no parents, set Pi(x) = prior + probability of x. + + 2. Leaf nodes: if x is a node with no children, set Lambda(x) + to an (unnormalized) unit vector, of length the cardinality of x. + """ + for root in self.get_roots(): + self.nodes_dict[root].pi_agg = self.nodes_dict[root].cpd.values + + for leaf in self.get_leaves(): + self.nodes_dict[leaf].lambda_agg = np.ones([self.nodes_dict[leaf].cardinality]) + + @property + def all_nodes_are_fully_initialized(self): + """ + Returns True if, for all nodes in the model, all lambda and pi + messages and lambda_agg and pi_agg are not None, else False. + """ + for node in self.nodes_dict.values(): + if not node.is_fully_initialized: + return False + return True + + def copy(self): + """ + Returns a copy of the model. + """ + copy_nodes = copy.deepcopy(self.nodes_dict) + copy_model = self.__class__(nodes_dict=copy_nodes) + return copy_model + + +class Node: + """A node in a DAG with methods to compute the belief (marginal probability + of the node given evidence) and compute pi/lambda messages to/from its neighbors + to update its belief. + + Implemented from Pearl's belief propagation algorithm. + """ + def __init__(self, + label_id, + children, + parents, + cardinality, + cpd): + """ + Args + label_id: str + children: set of strings + parents: set of strings + cardinality: int, cardinality of the random variable the node represents + cpd: an instance of a conditional probability distribution, + e.g. BernoulliOrCPD or TabularCPD + """ + self.label_id = label_id + self.children = children + self.parents = parents + self.cardinality = cardinality + self.cpd = cpd + + self.pi_agg = None # np.array dimensions [1, cardinality] + self.lambda_agg = None # np.array dimensions [1, cardinality] + + self.pi_received_msgs = self._init_received_msgs(parents) + self.lambda_received_msgs = self._init_received_msgs(children) + + @classmethod + def from_cpd_class(cls, + label_id, + children, + parents, + cardinality, + cpd_class): + cpd = cpd_class(label_id, parents) + return cls(label_id, children, parents, cardinality, cpd) + + @property + def belief(self): + if self.pi_agg.any() and self.lambda_agg.any(): + belief = np.multiply(self.pi_agg, self.lambda_agg) + return self._normalize(belief) + else: + return None + + def _normalize(self, value): + return value/value.sum() + + @staticmethod + def _init_received_msgs(keys): + return {k: None for k in keys} + + def _return_msgs_received_for_msg_type(self, message_type): + """ + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + if message_type == MessageType.LAMBDA: + msg_values = [msg for msg in self.lambda_received_msgs.values()] + elif message_type == MessageType.PI: + msg_values = [msg for msg in self.pi_received_msgs.values()] + return msg_values + + def validate_and_return_msgs_received_for_msg_type(self, message_type): + """ + Check that all messages have been received from children (parents). + Raise error if all messages have not been received. Called + before calculating lambda_agg (pi_agg). + + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + msg_values = self._return_msgs_received_for_msg_type(message_type) + + if any(msg is None for msg in msg_values): + raise ValueError( + "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg." + .format(msg_type=message_type.value) + ) + else: + return msg_values + + def compute_pi_agg(self): + # TODO: implement explict factor product operation + raise NotImplementedError + + def compute_lambda_agg(self): + if not self.children: + return self.lambda_agg + else: + lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) + self.lambda_agg = reduce(np.multiply, lambda_msg_values) + return self.lambda_agg + + def _update_received_msg_by_key(self, received_msg_dict, key, new_value): + if key not in received_msg_dict.keys(): + raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}" + .format(key, received_msg_dict.keys())) + + if not isinstance(new_value, np.ndarray): + raise TypeError("Expected a new value of type numpy.ndarray, but got type {}" + .format(type(new_value))) + + if new_value.shape != (self.cardinality,): + raise ValueError("Expected new value to be of dimensions ({},) but got {} instead" + .format(self.cardinality, new_value.shape)) + received_msg_dict[key] = new_value + + def update_pi_msg_from_parent(self, parent, new_value): + self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, + key=parent, + new_value=new_value) + + def update_lambda_msg_from_child(self, child, new_value): + self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, + key=child, + new_value=new_value) + + def compute_pi_msg_to_child(self, child_k): + lambda_msg_from_child = self.lambda_received_msgs[child_k] + if lambda_msg_from_child is not None: + with np.errstate(divide='ignore', invalid='ignore'): + # 0/0 := 0 + return self._normalize( + np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) + else: + raise ValueError("Can't compute pi message to child_{} without having received a lambda message from that child.") + + def compute_lambda_msg_to_parent(self, parent_k): + # TODO: implement explict factor product operation + raise NotImplementedError + + @property + def is_fully_initialized(self): + """ + Returns True if all lambda and pi messages and lambda_agg and + pi_agg are not None, else False. + """ + lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) + if any(msg is None for msg in lambda_msgs): + return False + + pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) + if any(msg is None for msg in pi_msgs): + return False + + if (self.pi_agg is None) or (self.lambda_agg is None): + return False + + return True + + +class BernoulliOrNode(Node): + def __init__(self, + label_id, + children, + parents): + super().__init__(label_id=label_id, + children=children, + parents=parents, + cardinality=2, + cpd=BernoulliOrCPD(label_id, parents)) + + def compute_pi_agg(self): + if not self.parents: + self.pi_agg = self.cpd.values + else: + pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + parents_p0 = [p[0] for p in pi_msg_values] + p_0 = reduce(lambda x, y: x*y, parents_p0) + p_1 = 1 - p_0 + self.pi_agg = np.array([p_0, p_1]) + return self.pi_agg + + def compute_lambda_msg_to_parent(self, parent_k): + if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): + return np.ones([self.cardinality]) + else: + # TODO: cleanup this validation + _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] + p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) + lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product + lambda_1 = self.lambda_agg[1] + lambda_msg = np.array([lambda_0, lambda_1]) + if not any(lambda_msg): + raise InvalidLambdaMsgToParent + return self._normalize(lambda_msg) diff --git a/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py b/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py deleted file mode 100644 index d74eaa7..0000000 --- a/beliefs/models/beliefupdate/BeliefUpdateNodeModel.py +++ /dev/null @@ -1,91 +0,0 @@ -import copy -import numpy as np - -from beliefs.models.BayesianModel import BayesianModel -from beliefs.utils.edges_helper import EdgesHelper - - -class BeliefUpdateNodeModel(BayesianModel): - """ - A Bayesian model storing nodes (e.g. Node or BernoulliOrNode) implementing properties - and methods for Pearl's belief update algorithm. - - ref: "Fusion, Propagation, and Structuring in Belief Networks" - Artificial Intelligence 29 (1986) 241-288 - - """ - def __init__(self, nodes_dict): - """ - Input: - nodes_dict: dict - a dict key, value pair as {label_id: instance_of_node_class_or_subclass} - """ - super().__init__(edges=self._get_edges_from_nodes(nodes_dict.values()), - variables=list(nodes_dict.keys()), - cpds=[node.cpd for node in nodes_dict.values()]) - - self.nodes_dict = nodes_dict - - @classmethod - def from_edges(cls, edges, node_class): - """Create nodes from the same node class. - - Input: - edges: list of edge tuples of form ('parent', 'child') - node_class: the Node class or subclass from which to - create all the nodes from edges. - """ - edges_helper = EdgesHelper(edges) - nodes = edges_helper.create_nodes_from_edges(node_class) - nodes_dict = {node.label_id: node for node in nodes} - return cls(nodes_dict) - - @staticmethod - def _get_edges_from_nodes(nodes): - """ - Return list of all directed edges in nodes. - - Args: - nodes: an iterable of objects of the Node class or subclass - Returns: - edges: list of edge tuples - """ - edges = set() - for node in nodes: - if node.parents: - edge_tuples = zip(node.parents, [node.label_id]*len(node.parents)) - edges.update(edge_tuples) - return list(edges) - - def set_boundary_conditions(self): - """ - 1. Root nodes: if x is a node with no parents, set Pi(x) = prior - probability of x. - - 2. Leaf nodes: if x is a node with no children, set Lambda(x) - to an (unnormalized) unit vector, of length the cardinality of x. - """ - for root in self.get_roots(): - self.nodes_dict[root].pi_agg = self.nodes_dict[root].cpd.values - - for leaf in self.get_leaves(): - self.nodes_dict[leaf].lambda_agg = np.ones([self.nodes_dict[leaf].cardinality]) - - @property - def all_nodes_are_fully_initialized(self): - """ - Returns True if, for all nodes in the model, all lambda and pi - messages and lambda_agg and pi_agg are not None, else False. - """ - for node in self.nodes_dict.values(): - if not node.is_fully_initialized: - return False - return True - - def copy(self): - """ - Returns a copy of the model. - """ - copy_nodes = copy.deepcopy(self.nodes_dict) - copy_model = self.__class__(nodes_dict=copy_nodes) - return copy_model diff --git a/beliefs/models/beliefupdate/BernoulliOrNode.py b/beliefs/models/beliefupdate/BernoulliOrNode.py deleted file mode 100644 index 3386275..0000000 --- a/beliefs/models/beliefupdate/BernoulliOrNode.py +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np -from functools import reduce - -from beliefs.models.beliefupdate.Node import ( - Node, - MessageType, - InvalidLambdaMsgToParent -) -from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD - - -class BernoulliOrNode(Node): - def __init__(self, - label_id, - children, - parents): - super().__init__(label_id=label_id, - children=children, - parents=parents, - cardinality=2, - cpd=BernoulliOrCPD(label_id, parents)) - - def compute_pi_agg(self): - if not self.parents: - self.pi_agg = self.cpd.values - else: - pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) - parents_p0 = [p[0] for p in pi_msg_values] - p_0 = reduce(lambda x, y: x*y, parents_p0) - p_1 = 1 - p_0 - self.pi_agg = np.array([p_0, p_1]) - return self.pi_agg - - def compute_lambda_msg_to_parent(self, parent_k): - if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): - return np.ones([self.cardinality]) - else: - # TODO: cleanup this validation - _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) - p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] - p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) - lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product - lambda_1 = self.lambda_agg[1] - lambda_msg = np.array([lambda_0, lambda_1]) - if not any(lambda_msg): - raise InvalidLambdaMsgToParent - return self._normalize(lambda_msg) diff --git a/beliefs/models/beliefupdate/Node.py b/beliefs/models/beliefupdate/Node.py deleted file mode 100644 index daa2f14..0000000 --- a/beliefs/models/beliefupdate/Node.py +++ /dev/null @@ -1,179 +0,0 @@ -import numpy as np -from functools import reduce -from enum import Enum - - -class InvalidLambdaMsgToParent(Exception): - """Computed invalid lambda msg to send to parent.""" - pass - - -class MessageType(Enum): - LAMBDA = 'lambda' - PI = 'pi' - - -class Node: - """A node in a DAG with methods to compute the belief (marginal probability - of the node given evidence) and compute pi/lambda messages to/from its neighbors - to update its belief. - - Implemented from Pearl's belief propagation algorithm. - """ - def __init__(self, - label_id, - children, - parents, - cardinality, - cpd): - """ - Args - label_id: str - children: set of strings - parents: set of strings - cardinality: int, cardinality of the random variable the node represents - cpd: an instance of a conditional probability distribution, - e.g. BernoulliOrCPD or TabularCPD - """ - self.label_id = label_id - self.children = children - self.parents = parents - self.cardinality = cardinality - self.cpd = cpd - - self.pi_agg = None # np.array dimensions [1, cardinality] - self.lambda_agg = None # np.array dimensions [1, cardinality] - - self.pi_received_msgs = self._init_received_msgs(parents) - self.lambda_received_msgs = self._init_received_msgs(children) - - @classmethod - def from_cpd_class(cls, - label_id, - children, - parents, - cardinality, - cpd_class): - cpd = cpd_class(label_id, parents) - return cls(label_id, children, parents, cardinality, cpd) - - @property - def belief(self): - if self.pi_agg.any() and self.lambda_agg.any(): - belief = np.multiply(self.pi_agg, self.lambda_agg) - return self._normalize(belief) - else: - return None - - def _normalize(self, value): - return value/value.sum() - - @staticmethod - def _init_received_msgs(keys): - return {k: None for k in keys} - - def _return_msgs_received_for_msg_type(self, message_type): - """ - Input: - message_type: MessageType enum - - Returns: - msg_values: list of message values (each an np.array) - """ - if message_type == MessageType.LAMBDA: - msg_values = [msg for msg in self.lambda_received_msgs.values()] - elif message_type == MessageType.PI: - msg_values = [msg for msg in self.pi_received_msgs.values()] - return msg_values - - def validate_and_return_msgs_received_for_msg_type(self, message_type): - """ - Check that all messages have been received from children (parents). - Raise error if all messages have not been received. Called - before calculating lambda_agg (pi_agg). - - Input: - message_type: MessageType enum - - Returns: - msg_values: list of message values (each an np.array) - """ - msg_values = self._return_msgs_received_for_msg_type(message_type) - - if any(msg is None for msg in msg_values): - raise ValueError( - "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg.". - format(msg_type=message_type.value) - ) - else: - return msg_values - - def compute_pi_agg(self): - # TODO: implement explict factor product operation - raise NotImplementedError - - def compute_lambda_agg(self): - if not self.children: - return self.lambda_agg - else: - lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) - self.lambda_agg = reduce(np.multiply, lambda_msg_values) - return self.lambda_agg - - def _update_received_msg_by_key(self, received_msg_dict, key, new_value): - if key not in received_msg_dict.keys(): - raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}". - format(key, received_msg_dict.keys())) - - if not isinstance(new_value, np.ndarray): - raise TypeError("Expected a new value of type numpy.ndarray, but got type {}". - format(type(new_value))) - - if new_value.shape != (self.cardinality,): - raise ValueError("Expected new value to be of dimensions ({},) but got {} instead". - format(self.cardinality, new_value.shape)) - received_msg_dict[key] = new_value - - def update_pi_msg_from_parent(self, parent, new_value): - self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, - key=parent, - new_value=new_value) - - def update_lambda_msg_from_child(self, child, new_value): - self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, - key=child, - new_value=new_value) - - def compute_pi_msg_to_child(self, child_k): - lambda_msg_from_child = self.lambda_received_msgs[child_k] - if lambda_msg_from_child is not None: - with np.errstate(divide='ignore', invalid='ignore'): - # 0/0 := 0 - return self._normalize( - np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) - else: - raise ValueError("Can't compute pi message to child_{} without having received" \ - "a lambda message from that child.") - - def compute_lambda_msg_to_parent(self, parent_k): - # TODO: implement explict factor product operation - raise NotImplementedError - - @property - def is_fully_initialized(self): - """ - Returns True if all lambda and pi messages and lambda_agg and - pi_agg are not None, else False. - """ - lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) - if any(msg is None for msg in lambda_msgs): - return False - - pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) - if any(msg is None for msg in pi_msgs): - return False - - if (self.pi_agg is None) or (self.lambda_agg is None): - return False - - return True diff --git a/beliefs/utils/edges_helper.py b/beliefs/utils/edges_helper.py deleted file mode 100644 index 130686c..0000000 --- a/beliefs/utils/edges_helper.py +++ /dev/null @@ -1,136 +0,0 @@ -from collections import defaultdict - -from beliefs.models.beliefupdate.Node import Node -from beliefs.factors.BernoulliOrCPD import BernoulliOrCPD - - -class EdgesHelper: - """Class with convenience methods for working with edges.""" - def __init__(self, edges): - self.edges = edges - - def get_label_to_children_dict(self): - """returns dictionary keyed on label, with value a set of children""" - label_to_children_dict = defaultdict(set) - for parent, child in self.edges: - label_to_children_dict[parent].add(child) - return label_to_children_dict - - def get_label_to_parents_dict(self): - """returns dictionary keyed on label, with value a set of parents - Only used to help create dummy factors from edges (not for algo). - """ - label_to_parents_dict = defaultdict(set) - - for parent, child in self.edges: - label_to_parents_dict[child].add(parent) - return label_to_parents_dict - - def get_labels_from_edges(self): - """Return the set of labels that comprise the vertices of a list of edge tuples.""" - all_labels = set() - for parent, child in self.edges: - all_labels.update({parent, child}) - return all_labels - - def create_cpds_from_edges(self, CPD=BernoulliOrCPD): - """ - Create factors from list of edges. - - Input: - cpd: a factor class, assumed initialization takes in a label_id, the label_id of - the child (should = label_id of the node), and set of label_ids of parents. - - Returns: - factors: a set of (unique) factors of the graph - """ - labels = self.get_labels_from_edges() - label_to_parents = self.get_label_to_parents_dict() - - factors = set() - - for label in labels: - parents = label_to_parents[label] - cpd = CPD(label, parents) - factors.add(cpd) - return factors - - def get_label_to_factor_dict(self, CPD=BernoulliOrCPD): - """Create a dictionary mapping each label_id to the CPD/factor where - that label_id is a child. - - Returns: - label_to_factor: dict mapping each label to the cpd that - has that label as a child. - """ - factors = self.create_cpds_from_edges(CPD=CPD) - - label_to_factor = dict() - for factor in factors: - label_to_factor[factor.child] = factor - return label_to_factor - - def get_label_to_node_dict(self, CPD=BernoulliOrCPD): - """Create a dictionary mapping each label_id to a Node instance. - - Returns: - label_to_node: dict mapping each label to the node that has that - label as a label_id. - """ - nodes = self.create_nodes_from_edges() - - label_to_node = dict() - for node in nodes: - label_to_node[node.label_id] = node - return label_to_node - - def get_label_to_node_dict_for_manual_cpds(self, cpds_list): - """Create a dictionary mapping each label_id to a node that is - instantiated with a manually defined pgmpy factor instance. - - Input: - cpds_list - list of instances of pgmpy factors, e.g. TabularCPD - - Returns: - label_to_node: dict mapping each label to the node that has that - label as a label_id. - """ - label_to_children = self.get_label_to_children_dict() - label_to_parents = self.get_label_to_parents_dict() - - label_to_node = dict() - for cpd in cpds_list: - label_id = cpd.variable - - node = Node(label_id=label_id, - children=label_to_children[label_id], - parents=label_to_parents[label_id], - cardinality=2, - cpd=cpd) - label_to_node[label_id] = node - - return label_to_node - - def create_nodes_from_edges(self, node_class): - """ - Create instances of the node_class. Assumes the node class is - initialized by label_id, children, and parents. - - Returns: - nodes: a set of (unique) nodes of the graph - """ - labels = self.get_labels_from_edges() - labels_to_parents = self.get_label_to_parents_dict() - labels_to_children = self.get_label_to_children_dict() - - nodes = set() - - for label in labels: - parents = list(labels_to_parents[label]) - children = list(labels_to_children[label]) - - node = node_class(label_id=label, - children=children, - parents=parents) - nodes.add(node) - return nodes diff --git a/tests/test_belief_propagation.py b/tests/test_belief_propagation.py index 264ddae..5c5a612 100644 --- a/tests/test_belief_propagation.py +++ b/tests/test_belief_propagation.py @@ -3,8 +3,10 @@ import pytest from pytest import approx from beliefs.inference.belief_propagation import BeliefPropagation, ConflictingEvidenceError -from beliefs.models.beliefupdate.BeliefUpdateNodeModel import BeliefUpdateNodeModel -from beliefs.models.beliefupdate.BernoulliOrNode import BernoulliOrNode +from beliefs.models.belief_update_node_model import ( + BeliefUpdateNodeModel, + BernoulliOrNode +) @pytest.fixture(scope='module') @@ -37,17 +39,17 @@ def many_parents_edges(): @pytest.fixture(scope='function') def four_node_model(edges_four_nodes): - return BeliefUpdateNodeModel.from_edges(edges_four_nodes, BernoulliOrNode) + return BeliefUpdateNodeModel.init_from_edges(edges_four_nodes, BernoulliOrNode) @pytest.fixture(scope='function') def simple_model(simple_edges): - return BeliefUpdateNodeModel.from_edges(simple_edges, BernoulliOrNode) + return BeliefUpdateNodeModel.init_from_edges(simple_edges, BernoulliOrNode) @pytest.fixture(scope='function') def many_parents_model(many_parents_edges): - return BeliefUpdateNodeModel.from_edges(many_parents_edges, BernoulliOrNode) + return BeliefUpdateNodeModel.init_from_edges(many_parents_edges, BernoulliOrNode) @pytest.fixture(scope='function') -- cgit v1.2.3 From c906bd37fba63ba706cc3b7802bfb18ffb05ee9a Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Sun, 3 Dec 2017 20:37:48 -0800 Subject: bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 8acdd82..4e379d2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.1 +0.0.2 -- cgit v1.2.3