Module imodels.tree.optimal_classification_tree.tree

Expand source code
from abc import abstractmethod, ABCMeta
import numpy as np
from collections import Counter
import random


class Tree(metaclass=ABCMeta):
    def __init__(self, root_node: int, depth: int, a: dict, b: dict, alpha: float = 0.0):
        self.root_node = root_node
        self.depth = depth
        self.a = a
        self.b = b
        self.c = None
        self.alpha = alpha

        assert depth >= 0, "Tree's depth should be non-negative! (depth: {0})".format(depth)
        for node in self.get_parent_nodes():

            if node not in a:
                print(root_node, depth)
                raise ValueError("The given `a` doesn't contain node {0}!".format(node))
            if node not in b:
                raise ValueError("The given `b` doesn't contain node {0}!".format(node))

    def subtree(self, root_node: int):
        this_tree_parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = this_tree_parent_depth + self.depth

        parent_depth = int(np.ceil(np.log2(root_node + 1))) - 1

        if self.depth == 0:
            raise ValueError("The current tree contains only one leaf node. Cannot create subtree for leaf node! ")

        subtree_empty = Tree(root_node, whole_depth - parent_depth, self.a, self.b)
        subtree_a = {}
        subtree_b = {}
        for left_node in subtree_empty.get_parent_nodes():
            subtree_a[left_node] = self.a[left_node].copy()
            subtree_b[left_node] = self.b[left_node]

        return Tree(root_node, whole_depth - parent_depth, subtree_a, subtree_b, alpha=self.alpha)

    def get_nodes(self):
        nodes = [self.root_node]
        previous_depth_nodes = [self.root_node]
        for d in range(1, self.depth + 1):
            new_previous_depth_nodes = []
            for current_node in previous_depth_nodes:
                left_node = current_node * 2
                right_node = current_node * 2 + 1
                nodes.append(left_node)
                nodes.append(right_node)
                new_previous_depth_nodes.append(left_node)
                new_previous_depth_nodes.append(right_node)
            previous_depth_nodes = new_previous_depth_nodes
        return nodes

    def get_leaf_nodes(self):
        parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = parent_depth + self.depth
        nodes = self.get_nodes()
        lower_node_index = 2 ** whole_depth
        return [node for node in nodes if node >= lower_node_index]

    def get_parent_nodes(self):
        parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = parent_depth + self.depth
        nodes = self.get_nodes()
        lower_node_index = 2 ** whole_depth
        return [node for node in nodes if node < lower_node_index]

    def children(self):
        if self.depth == 0:
            raise ValueError("The current tree contains only one leaf node. Cannot create children for leaf node! ")

        return self.subtree(self.root_node * 2), self.subtree(self.root_node * 2 + 1)

    def min_leaf_size(self, x):
        fake_y = np.zeros(x.shape[0])
        loss, min_leaf_size = self.loss_and_min_leaf_size(x, fake_y)
        return min_leaf_size

    def evaluate(self, x):
        n = x.shape[0]
        leaf_samples_mapping = {t: [] for t in self.get_leaf_nodes()}
        for i in range(n):
            current_x = x[i, ::]
            t = self.root_node
            d = 1
            while d < self.depth + 1:
                at = self.a[t]
                bt = self.b[t]
                if at.dot(current_x) < bt:
                    t = t * 2
                else:
                    t = t * 2 + 1
                d = d + 1
            leaf_samples_mapping[t].append(i)
        return leaf_samples_mapping

    def loss(self, x, y):
        loss, min_leaf_size = self.loss_and_min_leaf_size(x, y)
        return loss

    def loss_and_min_leaf_size(self, x, y):
        assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
            x.shape[0], y.shape[0]
        )
        res = self.evaluate(x)
        return self.loss_and_min_leaf_size_helper(res, y)

    def loss_and_min_leaf_size_helper(self, res: dict, y):
        predict_y = np.zeros(y.shape[0])
        predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
        for t in res:
            x_indices_this_node = res[t]
            if len(x_indices_this_node) > 0:
                true_y_this_node = [y[i] for i in x_indices_this_node]
                occurrence_count = Counter(true_y_this_node)
                label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
                predict_leaf_value[t] = label_this_node
                for i in x_indices_this_node:
                    predict_y[i] = label_this_node

        tree_complexity = 0.0 if len(self.get_parent_nodes()) == 0 else sum(
            [sum([1 if a != 0 else 0 for a in self.a[t]]) for t in self.get_parent_nodes()]) / float(
            len(self.get_parent_nodes()))

        loss = sum([1 if y[i] != predict_y[i] else 0 for i in range(y.shape[0])]) / y.shape[
            0] + self.alpha * tree_complexity

        leaf_samples_count = {t: len(res[t]) for t in res}
        min_leaf_size = min([i for i in leaf_samples_count.values() if i > 0])
        return loss, min_leaf_size

    def generate_majority_leaf_class(self, x, y):
        assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
            x.shape[0], y.shape[0]
        )
        res = self.evaluate(x)
        predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
        for t in res:
            x_indices_this_node = res[t]
            if len(x_indices_this_node) > 0:
                true_y_this_node = [y[i] for i in x_indices_this_node]
                occurrence_count = Counter(true_y_this_node)
                label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
                predict_leaf_value[t] = label_this_node
        self.c = predict_leaf_value

    def copy(self):
        return Tree(self.root_node, self.depth, self.a.copy(), self.b.copy(), self.alpha)


class WholeTree(Tree):
    def __init__(self, depth: int, a: dict, b: dict, alpha: float = 0.1):
        super(WholeTree, self).__init__(1, depth, a, b, alpha)


class TreeModel(Tree):
    def __init__(self, depth: int, p: int, alpha: float = 0.1):
        parent_nodes = [t for t in range(2 ** depth)]
        a = {}
        b = {}
        for t in parent_nodes:
            at = np.zeros(p)
            j = random.randint(0, p - 1)
            at[j] = 1
            bt = random.random()
            a[t] = at
            b[t] = bt
        super(TreeModel, self).__init__(1, depth, a, b, alpha)


if __name__ == "__main__":
    fake_a = {t: np.array([0, 0]) for t in range(100)}
    fake_b = {t: 0 for t in range(100)}
    tree = Tree(3, 2, fake_a, fake_b)
    print(tree.get_nodes())
    l, r = tree.children()
    print(l.a, r.get_nodes())
    # print(tree.min_leaf_size(np.array([[1, 1], [2, 2]])))

Classes

class Tree (root_node, depth, a, b, alpha=0.0)
Expand source code
class Tree(metaclass=ABCMeta):
    def __init__(self, root_node: int, depth: int, a: dict, b: dict, alpha: float = 0.0):
        self.root_node = root_node
        self.depth = depth
        self.a = a
        self.b = b
        self.c = None
        self.alpha = alpha

        assert depth >= 0, "Tree's depth should be non-negative! (depth: {0})".format(depth)
        for node in self.get_parent_nodes():

            if node not in a:
                print(root_node, depth)
                raise ValueError("The given `a` doesn't contain node {0}!".format(node))
            if node not in b:
                raise ValueError("The given `b` doesn't contain node {0}!".format(node))

    def subtree(self, root_node: int):
        this_tree_parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = this_tree_parent_depth + self.depth

        parent_depth = int(np.ceil(np.log2(root_node + 1))) - 1

        if self.depth == 0:
            raise ValueError("The current tree contains only one leaf node. Cannot create subtree for leaf node! ")

        subtree_empty = Tree(root_node, whole_depth - parent_depth, self.a, self.b)
        subtree_a = {}
        subtree_b = {}
        for left_node in subtree_empty.get_parent_nodes():
            subtree_a[left_node] = self.a[left_node].copy()
            subtree_b[left_node] = self.b[left_node]

        return Tree(root_node, whole_depth - parent_depth, subtree_a, subtree_b, alpha=self.alpha)

    def get_nodes(self):
        nodes = [self.root_node]
        previous_depth_nodes = [self.root_node]
        for d in range(1, self.depth + 1):
            new_previous_depth_nodes = []
            for current_node in previous_depth_nodes:
                left_node = current_node * 2
                right_node = current_node * 2 + 1
                nodes.append(left_node)
                nodes.append(right_node)
                new_previous_depth_nodes.append(left_node)
                new_previous_depth_nodes.append(right_node)
            previous_depth_nodes = new_previous_depth_nodes
        return nodes

    def get_leaf_nodes(self):
        parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = parent_depth + self.depth
        nodes = self.get_nodes()
        lower_node_index = 2 ** whole_depth
        return [node for node in nodes if node >= lower_node_index]

    def get_parent_nodes(self):
        parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
        whole_depth = parent_depth + self.depth
        nodes = self.get_nodes()
        lower_node_index = 2 ** whole_depth
        return [node for node in nodes if node < lower_node_index]

    def children(self):
        if self.depth == 0:
            raise ValueError("The current tree contains only one leaf node. Cannot create children for leaf node! ")

        return self.subtree(self.root_node * 2), self.subtree(self.root_node * 2 + 1)

    def min_leaf_size(self, x):
        fake_y = np.zeros(x.shape[0])
        loss, min_leaf_size = self.loss_and_min_leaf_size(x, fake_y)
        return min_leaf_size

    def evaluate(self, x):
        n = x.shape[0]
        leaf_samples_mapping = {t: [] for t in self.get_leaf_nodes()}
        for i in range(n):
            current_x = x[i, ::]
            t = self.root_node
            d = 1
            while d < self.depth + 1:
                at = self.a[t]
                bt = self.b[t]
                if at.dot(current_x) < bt:
                    t = t * 2
                else:
                    t = t * 2 + 1
                d = d + 1
            leaf_samples_mapping[t].append(i)
        return leaf_samples_mapping

    def loss(self, x, y):
        loss, min_leaf_size = self.loss_and_min_leaf_size(x, y)
        return loss

    def loss_and_min_leaf_size(self, x, y):
        assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
            x.shape[0], y.shape[0]
        )
        res = self.evaluate(x)
        return self.loss_and_min_leaf_size_helper(res, y)

    def loss_and_min_leaf_size_helper(self, res: dict, y):
        predict_y = np.zeros(y.shape[0])
        predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
        for t in res:
            x_indices_this_node = res[t]
            if len(x_indices_this_node) > 0:
                true_y_this_node = [y[i] for i in x_indices_this_node]
                occurrence_count = Counter(true_y_this_node)
                label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
                predict_leaf_value[t] = label_this_node
                for i in x_indices_this_node:
                    predict_y[i] = label_this_node

        tree_complexity = 0.0 if len(self.get_parent_nodes()) == 0 else sum(
            [sum([1 if a != 0 else 0 for a in self.a[t]]) for t in self.get_parent_nodes()]) / float(
            len(self.get_parent_nodes()))

        loss = sum([1 if y[i] != predict_y[i] else 0 for i in range(y.shape[0])]) / y.shape[
            0] + self.alpha * tree_complexity

        leaf_samples_count = {t: len(res[t]) for t in res}
        min_leaf_size = min([i for i in leaf_samples_count.values() if i > 0])
        return loss, min_leaf_size

    def generate_majority_leaf_class(self, x, y):
        assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
            x.shape[0], y.shape[0]
        )
        res = self.evaluate(x)
        predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
        for t in res:
            x_indices_this_node = res[t]
            if len(x_indices_this_node) > 0:
                true_y_this_node = [y[i] for i in x_indices_this_node]
                occurrence_count = Counter(true_y_this_node)
                label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
                predict_leaf_value[t] = label_this_node
        self.c = predict_leaf_value

    def copy(self):
        return Tree(self.root_node, self.depth, self.a.copy(), self.b.copy(), self.alpha)

Subclasses

Methods

def children(self)
Expand source code
def children(self):
    if self.depth == 0:
        raise ValueError("The current tree contains only one leaf node. Cannot create children for leaf node! ")

    return self.subtree(self.root_node * 2), self.subtree(self.root_node * 2 + 1)
def copy(self)
Expand source code
def copy(self):
    return Tree(self.root_node, self.depth, self.a.copy(), self.b.copy(), self.alpha)
def evaluate(self, x)
Expand source code
def evaluate(self, x):
    n = x.shape[0]
    leaf_samples_mapping = {t: [] for t in self.get_leaf_nodes()}
    for i in range(n):
        current_x = x[i, ::]
        t = self.root_node
        d = 1
        while d < self.depth + 1:
            at = self.a[t]
            bt = self.b[t]
            if at.dot(current_x) < bt:
                t = t * 2
            else:
                t = t * 2 + 1
            d = d + 1
        leaf_samples_mapping[t].append(i)
    return leaf_samples_mapping
def generate_majority_leaf_class(self, x, y)
Expand source code
def generate_majority_leaf_class(self, x, y):
    assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
        x.shape[0], y.shape[0]
    )
    res = self.evaluate(x)
    predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
    for t in res:
        x_indices_this_node = res[t]
        if len(x_indices_this_node) > 0:
            true_y_this_node = [y[i] for i in x_indices_this_node]
            occurrence_count = Counter(true_y_this_node)
            label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
            predict_leaf_value[t] = label_this_node
    self.c = predict_leaf_value
def get_leaf_nodes(self)
Expand source code
def get_leaf_nodes(self):
    parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
    whole_depth = parent_depth + self.depth
    nodes = self.get_nodes()
    lower_node_index = 2 ** whole_depth
    return [node for node in nodes if node >= lower_node_index]
def get_nodes(self)
Expand source code
def get_nodes(self):
    nodes = [self.root_node]
    previous_depth_nodes = [self.root_node]
    for d in range(1, self.depth + 1):
        new_previous_depth_nodes = []
        for current_node in previous_depth_nodes:
            left_node = current_node * 2
            right_node = current_node * 2 + 1
            nodes.append(left_node)
            nodes.append(right_node)
            new_previous_depth_nodes.append(left_node)
            new_previous_depth_nodes.append(right_node)
        previous_depth_nodes = new_previous_depth_nodes
    return nodes
def get_parent_nodes(self)
Expand source code
def get_parent_nodes(self):
    parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
    whole_depth = parent_depth + self.depth
    nodes = self.get_nodes()
    lower_node_index = 2 ** whole_depth
    return [node for node in nodes if node < lower_node_index]
def loss(self, x, y)
Expand source code
def loss(self, x, y):
    loss, min_leaf_size = self.loss_and_min_leaf_size(x, y)
    return loss
def loss_and_min_leaf_size(self, x, y)
Expand source code
def loss_and_min_leaf_size(self, x, y):
    assert x.shape[0] == y.shape[0], "Number of rows of x should be equal to length of y! ({0} != {1})".format(
        x.shape[0], y.shape[0]
    )
    res = self.evaluate(x)
    return self.loss_and_min_leaf_size_helper(res, y)
def loss_and_min_leaf_size_helper(self, res, y)
Expand source code
def loss_and_min_leaf_size_helper(self, res: dict, y):
    predict_y = np.zeros(y.shape[0])
    predict_leaf_value = {t: 0 for t in self.get_leaf_nodes()}
    for t in res:
        x_indices_this_node = res[t]
        if len(x_indices_this_node) > 0:
            true_y_this_node = [y[i] for i in x_indices_this_node]
            occurrence_count = Counter(true_y_this_node)
            label_this_node = sorted(occurrence_count.items(), key=lambda x: x[1], reverse=True)[0][0]
            predict_leaf_value[t] = label_this_node
            for i in x_indices_this_node:
                predict_y[i] = label_this_node

    tree_complexity = 0.0 if len(self.get_parent_nodes()) == 0 else sum(
        [sum([1 if a != 0 else 0 for a in self.a[t]]) for t in self.get_parent_nodes()]) / float(
        len(self.get_parent_nodes()))

    loss = sum([1 if y[i] != predict_y[i] else 0 for i in range(y.shape[0])]) / y.shape[
        0] + self.alpha * tree_complexity

    leaf_samples_count = {t: len(res[t]) for t in res}
    min_leaf_size = min([i for i in leaf_samples_count.values() if i > 0])
    return loss, min_leaf_size
def min_leaf_size(self, x)
Expand source code
def min_leaf_size(self, x):
    fake_y = np.zeros(x.shape[0])
    loss, min_leaf_size = self.loss_and_min_leaf_size(x, fake_y)
    return min_leaf_size
def subtree(self, root_node)
Expand source code
def subtree(self, root_node: int):
    this_tree_parent_depth = int(np.ceil(np.log2(self.root_node + 1))) - 1
    whole_depth = this_tree_parent_depth + self.depth

    parent_depth = int(np.ceil(np.log2(root_node + 1))) - 1

    if self.depth == 0:
        raise ValueError("The current tree contains only one leaf node. Cannot create subtree for leaf node! ")

    subtree_empty = Tree(root_node, whole_depth - parent_depth, self.a, self.b)
    subtree_a = {}
    subtree_b = {}
    for left_node in subtree_empty.get_parent_nodes():
        subtree_a[left_node] = self.a[left_node].copy()
        subtree_b[left_node] = self.b[left_node]

    return Tree(root_node, whole_depth - parent_depth, subtree_a, subtree_b, alpha=self.alpha)
class TreeModel (depth, p, alpha=0.1)
Expand source code
class TreeModel(Tree):
    def __init__(self, depth: int, p: int, alpha: float = 0.1):
        parent_nodes = [t for t in range(2 ** depth)]
        a = {}
        b = {}
        for t in parent_nodes:
            at = np.zeros(p)
            j = random.randint(0, p - 1)
            at[j] = 1
            bt = random.random()
            a[t] = at
            b[t] = bt
        super(TreeModel, self).__init__(1, depth, a, b, alpha)

Ancestors

class WholeTree (depth, a, b, alpha=0.1)
Expand source code
class WholeTree(Tree):
    def __init__(self, depth: int, a: dict, b: dict, alpha: float = 0.1):
        super(WholeTree, self).__init__(1, depth, a, b, alpha)

Ancestors