From 77d8b323d4f6e05ca97d9cbef43ac85fd8040d61 Mon Sep 17 00:00:00 2001 From: Cathy Yeh Date: Mon, 13 Nov 2017 14:42:52 -0800 Subject: copy scripts from lgs branch --- beliefs/factors/BernoulliOrFactor.py | 42 +++++++ beliefs/factors/__init__.py | 0 beliefs/inference/__init__.py | 0 beliefs/inference/belief_propagation.py | 192 ++++++++++++++++++++++++++++++++ beliefs/models/BayesianModel.py | 165 +++++++++++++++++++++++++++ beliefs/models/BernoulliOrModel.py | 17 +++ beliefs/models/DirectedGraph.py | 2 +- beliefs/types/BernoulliOrNode.py | 47 ++++++++ beliefs/types/Node.py | 179 +++++++++++++++++++++++++++++ beliefs/types/__init__.py | 0 beliefs/utils/__init__.py | 0 beliefs/utils/edges_helper.py | 136 ++++++++++++++++++++++ beliefs/utils/math_helper.py | 19 ++++ beliefs/utils/random_variables.py | 21 ++++ 14 files changed, 819 insertions(+), 1 deletion(-) create mode 100644 beliefs/factors/BernoulliOrFactor.py create mode 100644 beliefs/factors/__init__.py create mode 100644 beliefs/inference/__init__.py create mode 100644 beliefs/inference/belief_propagation.py create mode 100644 beliefs/models/BayesianModel.py create mode 100644 beliefs/models/BernoulliOrModel.py create mode 100644 beliefs/types/BernoulliOrNode.py create mode 100644 beliefs/types/Node.py create mode 100644 beliefs/types/__init__.py create mode 100644 beliefs/utils/__init__.py create mode 100644 beliefs/utils/edges_helper.py create mode 100644 beliefs/utils/math_helper.py create mode 100644 beliefs/utils/random_variables.py (limited to 'beliefs') diff --git a/beliefs/factors/BernoulliOrFactor.py b/beliefs/factors/BernoulliOrFactor.py new file mode 100644 index 0000000..4f973ae --- /dev/null +++ b/beliefs/factors/BernoulliOrFactor.py @@ -0,0 +1,42 @@ +import numpy as np + + +class BernoulliOrFactor: + """CPD class for a Bernoulli random variable whose relationship to its + parents is described by OR logic. + + If at least one of a child's parents is True, then the child is True, and + False otherwise.""" + def __init__(self, child, parents=set()): + self.child = child + self.parents = set(parents) + self.variables = set([child] + list(parents)) + self.cardinality = [2]*len(self.variables) + self._values = None + + @property + def values(self): + if self._values is None: + self._values = self._build_kwise_values_array(len(self.variables)) + self._values = self._values.reshape(self.cardinality) + return self._values + + def get_values(self): + """ + Returns the tabular cpd form of the values. + """ + if len(self.cardinality) == 1: + return self.values.reshape(1, np.prod(self.cardinality)) + else: + return self.values.reshape(self.cardinality[0], np.prod(self.cardinality[1:])) + + @staticmethod + def _build_kwise_values_array(k): + # special case a completely independent factor, and + # return the uniform prior + if k == 1: + return np.array([0.5, 0.5]) + + return np.array( + [1.,] + [0.]*(2**(k-1)-1) + [0.,] + [1.]*(2**(k-1)-1) + ) diff --git a/beliefs/factors/__init__.py b/beliefs/factors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/inference/__init__.py b/beliefs/inference/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/inference/belief_propagation.py b/beliefs/inference/belief_propagation.py new file mode 100644 index 0000000..972fd5d --- /dev/null +++ b/beliefs/inference/belief_propagation.py @@ -0,0 +1,192 @@ +import numpy as np +from collections import namedtuple + +from beliefs.types.Node import InvalidLambdaMsgToParent +from beliefs.utils.math_helper import is_kronecker_delta + + +MsgPassers = namedtuple('MsgPassers', ['msg_receiver', 'msg_sender']) + + +class ConflictingEvidenceError(Exception): + """Failed to run belief propagation on label graph because of conflicting evidence.""" + def __init__(self, evidence): + message = ( + "Can't run belief propagation with conflicting evidence: {}" + .format(evidence) + ) + super().__init__(message) + + +class BeliefPropagation: + def __init__(self, model, inplace=True): + """ + Input: + model: an instance of BayesianModel class or subclass + inplace: bool + modify in-place the nodes in the model during belief propagation + """ + if inplace is False: + self.model = model.copy() + else: + self.model = model + + def _belief_propagation(self, nodes_to_update, evidence): + """ + Implementation of Pearl's belief propagation algorithm for polytrees. + + ref: "Fusion, Propagation, and Structuring in Belief Networks" + Artificial Intelligence 29 (1986) 241-288 + + Input: + nodes_to_update: list + list of MsgPasser namedtuples. + evidence: dict, + a dict key, value pair as {var: state_of_var observed} + """ + if len(nodes_to_update) == 0: + return + + node_to_update_label_id, msg_sender_label_id = nodes_to_update.pop() + print("Node", node_to_update_label_id) + + node = self.model.nodes[node_to_update_label_id] + + # exclude the message sender (either a parent or child) from getting an + # outgoing msg from the node to update + parent_ids = node.parents - set([msg_sender_label_id]) + child_ids = node.children - set([msg_sender_label_id]) + print("parent_ids:", parent_ids) + print("child_ids:", child_ids) + + if msg_sender_label_id is not None: + # update triggered by receiving a message, not pinning to evidence + assert len(node.parents) + len(node.children) - 1 == len(parent_ids) + len(child_ids) + + if node_to_update_label_id not in evidence: + node.compute_pi_agg() + print("belief propagation pi_agg", node.pi_agg) + node.compute_lambda_agg() + print("belief propagation lambda_agg", node.lambda_agg) + + for parent_id in parent_ids: + try: + new_lambda_msg = node.compute_lambda_msg_to_parent(parent_k=parent_id) + except InvalidLambdaMsgToParent: + raise ConflictingEvidenceError(evidence=evidence) + + parent_node = self.model.nodes[parent_id] + parent_node.update_lambda_msg_from_child(child=node_to_update_label_id, + new_value=new_lambda_msg) + nodes_to_update.add(MsgPassers(msg_receiver=parent_id, + msg_sender=node_to_update_label_id)) + + for child_id in child_ids: + new_pi_msg = node.compute_pi_msg_to_child(child_k=child_id) + child_node = self.model.nodes[child_id] + child_node.update_pi_msg_from_parent(parent=node_to_update_label_id, + new_value=new_pi_msg) + nodes_to_update.add(MsgPassers(msg_receiver=child_id, + msg_sender=node_to_update_label_id)) + + self._belief_propagation(nodes_to_update, evidence) + + def initialize_model(self): + """ + Apply boundary conditions: + - Set pi_agg equal to prior probabilities for root nodes. + - Set lambda_agg equal to vector of ones for leaf nodes. + + - Set lambda_agg, lambda_received_msgs to vectors of ones (same effect as + actually passing lambda messages up from leaf nodes to root nodes). + - Calculate pi_agg and pi_received_msgs for all nodes without evidence. + (Without evidence, belief equals pi_agg.) + """ + self.model.set_boundary_conditions() + + for node in self.model.nodes.values(): + ones_vector = np.ones([node.cardinality]) + + node.lambda_agg = ones_vector + for child in node.lambda_received_msgs.keys(): + node.update_lambda_msg_from_child(child=child, + new_value=ones_vector) + print("Finished initializing Lambda(x) and lambda_received_msgs per node.") + + print("Start downward sweep from nodes. Sending Pi messages only.") + topdown_order = self.model.get_topologically_sorted_nodes(reverse=False) + + for node_id in topdown_order: + print('label in iteration through top-down order:', node_id) + + node_sending_msg = self.model.nodes[node_id] + child_ids = node_sending_msg.children + + if node_sending_msg.pi_agg is None: + node_sending_msg.compute_pi_agg() + + for child_id in child_ids: + print("child", child_id) + new_pi_msg = node_sending_msg.compute_pi_msg_to_child(child_k=child_id) + print(new_pi_msg) + + child_node = self.model.nodes[child_id] + child_node.update_pi_msg_from_parent(parent=node_id, + new_value=new_pi_msg) + + def _run_belief_propagation(self, evidence): + """ + Input: + evidence: dict + a dict key, value pair as {var: state_of_var observed} + """ + for evidence_id, observed_value in evidence.items(): + nodes_to_update = set() + + if evidence_id not in self.model.nodes.keys(): + raise KeyError("Evidence supplied for non-existent label_id: {}" + .format(evidence_id)) + + if is_kronecker_delta(observed_value): + # specific evidence + self.model.nodes[evidence_id].lambda_agg = observed_value + else: + # virtual evidence + self.model.nodes[evidence_id].lambda_agg = \ + self.model.nodes[evidence_id].lambda_agg * observed_value + + nodes_to_update.add(MsgPassers(msg_receiver=evidence_id, + msg_sender=None)) + + self._belief_propagation(nodes_to_update=nodes_to_update, + evidence=evidence) + + def query(self, evidence={}): + """ + Run belief propagation given evidence. + + Input: + evidence: dict + a dict key, value pair as {var: state_of_var observed}, + e.g. {'3': np.array([0,1])} if label '3' is True. + + Returns: + beliefs: dict + a dict key, value pair as {var: belief} + + Example + ------- + >> from label_graph_service.pgm.inference.belief_propagation import BeliefPropagation + >> from label_graph_service.pgm.models.BernoulliOrModel import BernoulliOrModel + >> edges = [('1', '3'), ('2', '3'), ('3', '5')] + >> model = BernoulliOrModel(edges) + >> infer = BeliefPropagation(model) + >> result = infer.query({'2': np.array([0, 1])}) + """ + if not self.model.all_nodes_are_fully_initialized: + self.initialize_model() + + if evidence: + self._run_belief_propagation(evidence) + + return {label_id: node.belief for label_id, node in self.model.nodes.items()} diff --git a/beliefs/models/BayesianModel.py b/beliefs/models/BayesianModel.py new file mode 100644 index 0000000..bdfd037 --- /dev/null +++ b/beliefs/models/BayesianModel.py @@ -0,0 +1,165 @@ +import copy +import numpy as np +import networkx as nx + +from beliefs.models.DirectedGraph import DirectedGraph +from beliefs.utils.edges_helper import EdgesHelper +from beliefs.utils.math_helper import is_kronecker_delta + + +class BayesianModel(DirectedGraph): + """ + Bayesian model stores nodes and edges described by conditional probability + distributions. + """ + def __init__(self, edges, nodes=None): + """ + Input: + edges: list of edge tuples of form ('parent', 'child') + nodes: (optional) dict + a dict key, value pair as {label_id: instance_of_node_class_or_subclass} + """ + if nodes is not None: + super().__init__(edges, nodes.keys()) + else: + super().__init__(edges) + self.nodes = nodes + + @classmethod + def from_node_class(cls, edges, node_class): + """Automatically create all nodes from the same node class + + Input: + edges: list of edge tuples of form ('parent', 'child') + node_class: (optional) the Node class or subclass from which to + create all the nodes from edges. + """ + nodes = cls.create_nodes(edges, node_class) + return cls.__init__(edges=edges, nodes=nodes) + + @staticmethod + def create_nodes(edges, node_class): + """Returns list of Node instances created from edges using + the default node_class""" + edges_helper = EdgesHelper(edges) + nodes = edges_helper.create_nodes_from_edges(node_class=node_class) + label_to_node = dict() + for node in nodes: + label_to_node[node.label_id] = node + return label_to_node + + def set_boundary_conditions(self): + """ + 1. Root nodes: if x is a node with no parents, set Pi(x) = prior + probability of x. + + 2. Leaf nodes: if x is a node with no children, set Lambda(x) + to an (unnormalized) unit vector, of length the cardinality of x. + """ + for root in self.get_roots(): + self.nodes[root].pi_agg = self.nodes[root].cpd.values + + for leaf in self.get_leaves(): + self.nodes[leaf].lambda_agg = np.ones([self.nodes[leaf].cardinality]) + + @property + def all_nodes_are_fully_initialized(self): + """ + Returns True if, for all nodes in the model, all lambda and pi + messages and lambda_agg and pi_agg are not None, else False. + """ + for node in self.nodes.values(): + if not node.is_fully_initialized: + return False + return True + + def copy(self): + """ + Returns a copy of the model. + """ + copy_edges = self.edges().copy() + copy_nodes = copy.deepcopy(self.nodes) + copy_model = self.__class__(edges=copy_edges, nodes=copy_nodes) + return copy_model + + def get_variables_in_definite_state(self): + """ + Returns a set of labels of all nodes in a definite state, i.e. with + label values that are kronecker deltas. + + RETURNS + set of strings (labels) + """ + return {label for label, node in self.nodes.items() if is_kronecker_delta(node.belief)} + + def get_unobserved_variables_in_definite_state(self, observed=set()): + """ + Returns a set of labels that are inferred to be in definite state, given + list of labels that were directly observed (e.g. YES/NOs, but not MAYBEs). + + INPUT + observed: set of strings, directly observed labels + RETURNS + set of strings, labels inferred to be in a definite state + """ + + # Assert that beliefs of directly observed vars are kronecker deltas + for label in observed: + assert is_kronecker_delta(self.nodes[label].belief), \ + ("Observed label has belief {} but should be kronecker delta" + .format(self.nodes[label].belief)) + + vars_in_definite_state = self.get_variables_in_definite_state() + assert observed <= vars_in_definite_state, \ + "Expected set of observed labels to be a subset of labels in definite state." + return vars_in_definite_state - observed + + def _get_ancestors_of(self, observed): + """Return list of ancestors of observed labels, including the observed labels themselves.""" + ancestors = observed.copy() + for label in observed: + ancestors.update(nx.ancestors(self, label)) + return ancestors + + def reachable_observed_variables(self, source, observed=set()): + """ + Returns list of observed labels (labels with direct evidence to be in a definite + state) that are reachable from the source. + + INPUT + source: string, label of node for which to evaluate reachable observed labels + observed: set of strings, directly observed labels + RETURNS + reachable_observed_vars: set of strings, observed labels (variables with direct + evidence) that are reachable from the source label. + """ + ancestors_of_observed = self._get_ancestors_of(observed) + + visit_list = set() + visit_list.add((source, 'up')) + traversed_list = set() + reachable_observed_vars = set() + + while visit_list: + node, direction = visit_list.pop() + if (node, direction) not in traversed_list: + if node in observed: + reachable_observed_vars.add(node) + traversed_list.add((node, direction)) + if direction == 'up' and node not in observed: + for parent in self.predecessors(node): + # causal flow + visit_list.add((parent, 'up')) + for child in self.successors(node): + # common cause flow + visit_list.add((child, 'down')) + elif direction == 'down': + if node not in observed: + # evidential flow + for child in self.successors(node): + visit_list.add((child, 'down')) + if node in ancestors_of_observed: + # common effect flow (activated v-structure) + for parent in self.predecessors(node): + visit_list.add((parent, 'up')) + return reachable_observed_vars diff --git a/beliefs/models/BernoulliOrModel.py b/beliefs/models/BernoulliOrModel.py new file mode 100644 index 0000000..da18fb6 --- /dev/null +++ b/beliefs/models/BernoulliOrModel.py @@ -0,0 +1,17 @@ +from beliefs.models.BayesianModel import BayesianModel +from beliefs.types.BernoulliOrNode import BernoulliOrNode + + +class BernoulliOrModel(BayesianModel): + """ + BernoulliOrModel stores node instances of BernoulliOrNodes (Bernoulli + variables associated with an OR conditional probability distribution). + """ + def __init__(self, edges, nodes=None): + """ + Input: + edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] + """ + if nodes is None: + nodes = self.create_nodes(edges, node_class=BernoulliOrNode) + super().__init__(edges, nodes=nodes) diff --git a/beliefs/models/DirectedGraph.py b/beliefs/models/DirectedGraph.py index 8dfb9bd..8fce894 100644 --- a/beliefs/models/DirectedGraph.py +++ b/beliefs/models/DirectedGraph.py @@ -5,7 +5,7 @@ class DirectedGraph(nx.DiGraph): """ Base class for all directed graphical models. """ - def __init__(self, edges, node_labels): + def __init__(self, edges=None, node_labels=None): """ Input: edges: an edge list, e.g. [(parent1, child1), (parent1, child2)] diff --git a/beliefs/types/BernoulliOrNode.py b/beliefs/types/BernoulliOrNode.py new file mode 100644 index 0000000..27da85a --- /dev/null +++ b/beliefs/types/BernoulliOrNode.py @@ -0,0 +1,47 @@ +import numpy as np +from functools import reduce + +from beliefs.types.Node import ( + Node, + MessageType, + InvalidLambdaMsgToParent +) +from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor + + +class BernoulliOrNode(Node): + def __init__(self, + label_id, + children, + parents): + super().__init__(label_id=label_id, + children=children, + parents=parents, + cardinality=2, + cpd=BernoulliOrFactor(label_id, parents)) + + def compute_pi_agg(self): + if not self.parents: + self.pi_agg = self.cpd.values + else: + pi_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + parents_p0 = [p[0] for p in pi_msg_values] + p_0 = reduce(lambda x, y: x*y, parents_p0) + p_1 = 1 - p_0 + self.pi_agg = np.array([p_0, p_1]) + return self.pi_agg + + def compute_lambda_msg_to_parent(self, parent_k): + if np.array_equal(self.lambda_agg, np.ones([self.cardinality])): + return np.ones([self.cardinality]) + else: + # TODO: cleanup this validation + _ = self.validate_and_return_msgs_received_for_msg_type(MessageType.PI) + p0_excluding_k = [msg[0] for par_id, msg in self.pi_received_msgs.items() if par_id != parent_k] + p0_product = reduce(lambda x, y: x*y, p0_excluding_k, 1) + lambda_0 = self.lambda_agg[1] + (self.lambda_agg[0] - self.lambda_agg[1])*p0_product + lambda_1 = self.lambda_agg[1] + lambda_msg = np.array([lambda_0, lambda_1]) + if not any(lambda_msg): + raise InvalidLambdaMsgToParent + return self._normalize(lambda_msg) diff --git a/beliefs/types/Node.py b/beliefs/types/Node.py new file mode 100644 index 0000000..a8dca7c --- /dev/null +++ b/beliefs/types/Node.py @@ -0,0 +1,179 @@ +import numpy as np +from functools import reduce +from enum import Enum + + +class InvalidLambdaMsgToParent(Exception): + """Computed invalid lambda msg to send to parent.""" + pass + + +class MessageType(Enum): + LAMBDA = 'lambda' + PI = 'pi' + + +class Node: + """A node in a DAG with methods to compute the belief (marginal probability + of the node given evidence) and compute pi/lambda messages to/from its neighbors + to update its belief. + + Implemented from Pearl's belief propagation algorithm. + """ + def __init__(self, + label_id, + children, + parents, + cardinality, + cpd): + """ + Input: + label_id: str + children: str + parents: set of strings + cardinality: int, cardinality of the random variable the node represents + cpd: an instance of a conditional probability distribution, + e.g. BernoulliOrFactor or pgmpy's TabularCPD + """ + self.label_id = label_id + self.children = children + self.parents = parents + self.cardinality = cardinality + self.cpd = cpd + + self.pi_agg = None # np.array dimensions [1, cardinality] + self.lambda_agg = None # np.array dimensions [1, cardinality] + + self.pi_received_msgs = self._init_received_msgs(parents) + self.lambda_received_msgs = self._init_received_msgs(children) + + @classmethod + def from_cpd_class(cls, + label_id, + children, + parents, + cardinality, + cpd_class): + cpd = cpd_class(label_id, parents) + return cls(label_id, children, parents, cardinality, cpd) + + @property + def belief(self): + if self.pi_agg.any() and self.lambda_agg.any(): + belief = np.multiply(self.pi_agg, self.lambda_agg) + return self._normalize(belief) + else: + return None + + def _normalize(self, value): + return value/value.sum() + + @staticmethod + def _init_received_msgs(keys): + return {k: None for k in keys} + + def _return_msgs_received_for_msg_type(self, message_type): + """ + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + if message_type == MessageType.LAMBDA: + msg_values = [msg for msg in self.lambda_received_msgs.values()] + elif message_type == MessageType.PI: + msg_values = [msg for msg in self.pi_received_msgs.values()] + return msg_values + + def validate_and_return_msgs_received_for_msg_type(self, message_type): + """ + Check that all messages have been received from children (parents). + Raise error if all messages have not been received. Called + before calculating lambda_agg (pi_agg). + + Input: + message_type: MessageType enum + + Returns: + msg_values: list of message values (each an np.array) + """ + msg_values = self._return_msgs_received_for_msg_type(message_type) + + if any(msg is None for msg in msg_values): + raise ValueError( + "Missing value for {msg_type} msg from child: can't compute {msg_type}_agg.". + format(msg_type=message_type.value) + ) + else: + return msg_values + + def compute_pi_agg(self): + # TODO: implement explict factor product operation + raise NotImplementedError + + def compute_lambda_agg(self): + if not self.children: + return self.lambda_agg + else: + lambda_msg_values = self.validate_and_return_msgs_received_for_msg_type(MessageType.LAMBDA) + self.lambda_agg = reduce(np.multiply, lambda_msg_values) + return self.lambda_agg + + def _update_received_msg_by_key(self, received_msg_dict, key, new_value): + if key not in received_msg_dict.keys(): + raise ValueError("Label id '{}' to update message isn't in allowed set of keys: {}". + format(key, received_msg_dict.keys())) + + if not isinstance(new_value, np.ndarray): + raise TypeError("Expected a new value of type numpy.ndarray, but got type {}". + format(type(new_value))) + + if new_value.shape != (self.cardinality,): + raise ValueError("Expected new value to be of dimensions ({},) but got {} instead". + format(self.cardinality, new_value.shape)) + received_msg_dict[key] = new_value + + def update_pi_msg_from_parent(self, parent, new_value): + self._update_received_msg_by_key(received_msg_dict=self.pi_received_msgs, + key=parent, + new_value=new_value) + + def update_lambda_msg_from_child(self, child, new_value): + self._update_received_msg_by_key(received_msg_dict=self.lambda_received_msgs, + key=child, + new_value=new_value) + + def compute_pi_msg_to_child(self, child_k): + lambda_msg_from_child = self.lambda_received_msgs[child_k] + if lambda_msg_from_child is not None: + with np.errstate(divide='ignore', invalid='ignore'): + # 0/0 := 0 + return self._normalize( + np.nan_to_num(np.divide(self.belief, lambda_msg_from_child))) + else: + raise ValueError("Can't compute pi message to child_{} without having received" \ + "a lambda message from that child.") + + def compute_lambda_msg_to_parent(self, parent_k): + # TODO: implement explict factor product operation + raise NotImplementedError + + @property + def is_fully_initialized(self): + """ + Returns True if all lambda and pi messages and lambda_agg and + pi_agg are not None, else False. + """ + lambda_msgs = self._return_msgs_received_for_msg_type(MessageType.LAMBDA) + if any(msg is None for msg in lambda_msgs): + return False + + pi_msgs = self._return_msgs_received_for_msg_type(MessageType.PI) + if any(msg is None for msg in pi_msgs): + return False + + if (self.pi_agg is None) or (self.lambda_agg is None): + return False + + return True diff --git a/beliefs/types/__init__.py b/beliefs/types/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/utils/__init__.py b/beliefs/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/beliefs/utils/edges_helper.py b/beliefs/utils/edges_helper.py new file mode 100644 index 0000000..7ac783c --- /dev/null +++ b/beliefs/utils/edges_helper.py @@ -0,0 +1,136 @@ +from collections import defaultdict + +from beliefs.types.Node import Node +from beliefs.factors.BernoulliOrFactor import BernoulliOrFactor + + +class EdgesHelper: + """Class with convenience methods for working with edges.""" + def __init__(self, edges): + self.edges = edges + + def get_label_to_children_dict(self): + """returns dictionary keyed on label, with value a set of children""" + label_to_children_dict = defaultdict(set) + for parent, child in self.edges: + label_to_children_dict[parent].add(child) + return label_to_children_dict + + def get_label_to_parents_dict(self): + """returns dictionary keyed on label, with value a set of parents + Only used to help create dummy factors from edges (not for algo). + """ + label_to_parents_dict = defaultdict(set) + + for parent, child in self.edges: + label_to_parents_dict[child].add(parent) + return label_to_parents_dict + + def get_labels_from_edges(self): + """Return the set of labels that comprise the vertices of a list of edge tuples.""" + all_labels = set() + for parent, child in self.edges: + all_labels.update({parent, child}) + return all_labels + + def create_cpds_from_edges(self, CPD=BernoulliOrFactor): + """ + Create factors from list of edges. + + Input: + cpd: a factor class, assumed initialization takes in a label_id, the label_id of + the child (should = label_id of the node), and set of label_ids of parents. + + Returns: + factors: a set of (unique) factors of the graph + """ + labels = self.get_labels_from_edges() + label_to_parents = self.get_label_to_parents_dict() + + factors = set() + + for label in labels: + parents = label_to_parents[label] + cpd = CPD(label, parents) + factors.add(cpd) + return factors + + def get_label_to_factor_dict(self, CPD=BernoulliOrFactor): + """Create a dictionary mapping each label_id to the CPD/factor where + that label_id is a child. + + Returns: + label_to_factor: dict mapping each label to the cpd that + has that label as a child. + """ + factors = self.create_cpds_from_edges(CPD=CPD) + + label_to_factor = dict() + for factor in factors: + label_to_factor[factor.child] = factor + return label_to_factor + + def get_label_to_node_dict(self, CPD=BernoulliOrFactor): + """Create a dictionary mapping each label_id to a Node instance. + + Returns: + label_to_node: dict mapping each label to the node that has that + label as a label_id. + """ + nodes = self.create_nodes_from_edges() + + label_to_node = dict() + for node in nodes: + label_to_node[node.label_id] = node + return label_to_node + + def get_label_to_node_dict_for_manual_cpds(self, cpds_list): + """Create a dictionary mapping each label_id to a node that is + instantiated with a manually defined pgmpy factor instance. + + Input: + cpds_list - list of instances of pgmpy factors, e.g. TabularCPD + + Returns: + label_to_node: dict mapping each label to the node that has that + label as a label_id. + """ + label_to_children = self.get_label_to_children_dict() + label_to_parents = self.get_label_to_parents_dict() + + label_to_node = dict() + for cpd in cpds_list: + label_id = cpd.variable + + node = Node(label_id=label_id, + children=label_to_children[label_id], + parents=label_to_parents[label_id], + cardinality=2, + cpd=cpd) + label_to_node[label_id] = node + + return label_to_node + + def create_nodes_from_edges(self, node_class): + """ + Create instances of the node_class. Assumes the node class is + initialized by label_id, children, and parents. + + Returns: + nodes: a set of (unique) nodes of the graph + """ + labels = self.get_labels_from_edges() + labels_to_parents = self.get_label_to_parents_dict() + labels_to_children = self.get_label_to_children_dict() + + nodes = set() + + for label in labels: + parents = labels_to_parents[label] + children = labels_to_children[label] + + node = node_class(label_id=label, + children=children, + parents=parents) + nodes.add(node) + return nodes diff --git a/beliefs/utils/math_helper.py b/beliefs/utils/math_helper.py new file mode 100644 index 0000000..a25ea68 --- /dev/null +++ b/beliefs/utils/math_helper.py @@ -0,0 +1,19 @@ +"""Random math utils.""" + + +def is_kronecker_delta(vector): + """Returns True if vector is a kronecker delta vector, False otherwise. + Specific evidence ('YES' or 'NO') is a kronecker delta vector, whereas + virtual evidence ('MAYBE') is not. + """ + count = 0 + for x in vector: + if x == 1: + count += 1 + elif x != 0: + return False + + if count == 1: + return True + else: + return False diff --git a/beliefs/utils/random_variables.py b/beliefs/utils/random_variables.py new file mode 100644 index 0000000..1a0b0f7 --- /dev/null +++ b/beliefs/utils/random_variables.py @@ -0,0 +1,21 @@ + + +def get_reachable_observed_variables_for_inferred_variables(model, observed=set()): + """ + After performing inference on a BayesianModel, get the labels of observed variables + ("reachable observed variables") that influenced the beliefs of variables inferred + to be in a definite state. + + INPUT + model: instance of BayesianModel class or subclass + observed: set of labels (strings) corresponding to vars pinned to definite + state during inference. + RETURNS + dict, of form key - source label (a string), value - a list of strings + """ + if not observed: + return {} + + source_vars = model.get_unobserved_variables_in_definite_state(observed) + + return {var: model.reachable_observed_variables(var, observed) for var in source_vars} -- cgit v1.2.3