Module imodels.rulefit.rulefit

Linear model of tree-based decision rules

This method implement the RuleFit algorithm

The module structure is the following:

  • <a title="imodels.rulefit.rulefit.RuleCondition" href="#imodels.rulefit.rulefit.RuleCondition">RuleCondition</a> implements a binary feature transformation
  • <a title="imodels.rulefit.rulefit.Rule" href="#imodels.rulefit.rulefit.Rule">Rule</a> implements a Rule composed of RuleConditions
  • <a title="imodels.rulefit.rulefit.RuleEnsemble" href="#imodels.rulefit.rulefit.RuleEnsemble">RuleEnsemble</a> implements an ensemble of Rules
  • <a title="imodels.rulefit.rulefit.RuleFit" href="#imodels.rulefit.rulefit.RuleFit">RuleFit</a> implements the RuleFit algorithm
Expand source code
"""Linear model of tree-based decision rules

This method implement the RuleFit algorithm

The module structure is the following:

- ``RuleCondition`` implements a binary feature transformation
- ``Rule`` implements a Rule composed of ``RuleConditions``
- ``RuleEnsemble`` implements an ensemble of ``Rules``
- ``RuleFit`` implements the RuleFit algorithm

"""
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LassoCV,LogisticRegressionCV
from functools import reduce


class RuleCondition():
    """Class for binary rule condition

    Warning: this class should not be used directly.
    """

    def __init__(self,
                 feature_index,
                 threshold,
                 operator,
                 support,
                 feature_name = None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.operator = operator
        self.support = support
        self.feature_name = feature_name


    def __repr__(self):
        return self.__str__()

    def __str__(self):
        if self.feature_name:
            feature = self.feature_name
        else:
            feature = self.feature_index
        return "%s %s %s" % (feature, self.operator, self.threshold)

    def transform(self, X):
        """Transform dataset.

        Parameters
        ----------
        X: array-like matrix, shape=(n_samples, n_features)

        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, 1)
        """
        if self.operator == "<=":
            res =  1 * (X[:,self.feature_index] <= self.threshold)
        elif self.operator == ">":
            res = 1 * (X[:,self.feature_index] > self.threshold)
        return res

    def __eq__(self, other):
        return self.__hash__() == other.__hash__()

    def __hash__(self):
        return hash((self.feature_index, self.threshold, self.operator, self.feature_name))


class Winsorizer():
    """Performs Winsorization 1->1*

    Warning: this class should not be used directly.
    """    
    def __init__(self,trim_quantile=0.0):
        self.trim_quantile=trim_quantile
        self.winsor_lims=None
        
    def train(self,X):
        # get winsor limits
        self.winsor_lims=np.ones([2,X.shape[1]])*np.inf
        self.winsor_lims[0,:]=-np.inf
        if self.trim_quantile>0:
            for i_col in np.arange(X.shape[1]):
                lower=np.percentile(X[:,i_col],self.trim_quantile*100)
                upper=np.percentile(X[:,i_col],100-self.trim_quantile*100)
                self.winsor_lims[:,i_col]=[lower,upper]
        
    def trim(self,X):
        X_=X.copy()
        X_=np.where(X>self.winsor_lims[1,:],np.tile(self.winsor_lims[1,:],[X.shape[0],1]),np.where(X<self.winsor_lims[0,:],np.tile(self.winsor_lims[0,:],[X.shape[0],1]),X))
        return X_

class FriedScale():
    """Performs scaling of linear variables according to Friedman et al. 2005 Sec 5

    Each variable is first Winsorized l->l*, then standardised as 0.4 x l* / std(l*)
    Warning: this class should not be used directly.
    """    
    def __init__(self, winsorizer = None):
        self.scale_multipliers=None
        self.winsorizer = winsorizer
        
    def train(self,X):
        # get multipliers
        if self.winsorizer != None:
            X_trimmed= self.winsorizer.trim(X)
        else:
            X_trimmed = X

        scale_multipliers=np.ones(X.shape[1])
        for i_col in np.arange(X.shape[1]):
            num_uniq_vals=len(np.unique(X[:,i_col]))
            if num_uniq_vals>2: # don't scale binary variables which are effectively already rules
                scale_multipliers[i_col]=0.4/(1.0e-12 + np.std(X_trimmed[:,i_col]))
        self.scale_multipliers=scale_multipliers
        
    def scale(self,X):
        if self.winsorizer != None:
            return self.winsorizer.trim(X)*self.scale_multipliers
        else:
            return X*self.scale_multipliers
        

class Rule():
    """Class for binary Rules from list of conditions

    Warning: this class should not be used directly.
    """
    def __init__(self,
                 rule_conditions,prediction_value):
        self.conditions = set(rule_conditions)
        self.support = min([x.support for x in rule_conditions])
        self.prediction_value=prediction_value
        self.rule_direction=None
    def transform(self, X):
        """Transform dataset.

        Parameters
        ----------
        X: array-like matrix

        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, 1)
        """
        rule_applies = [condition.transform(X) for condition in self.conditions]
        return reduce(lambda x,y: x * y, rule_applies)

    def __str__(self):
        return  " & ".join([x.__str__() for x in self.conditions])

    def __repr__(self):
        return self.__str__()

    def __hash__(self):
        return sum([condition.__hash__() for condition in self.conditions])

    def __eq__(self, other):
        return self.__hash__() == other.__hash__()


def extract_rules_from_tree(tree, feature_names=None):
    """Helper to turn a tree into as set of rules
    """
    rules = set()

    def traverse_nodes(node_id=0,
                       operator=None,
                       threshold=None,
                       feature=None,
                       conditions=[]):
        if node_id != 0:
            if feature_names is not None:
                feature_name = feature_names[feature]
            else:
                feature_name = feature
            rule_condition = RuleCondition(feature_index=feature,
                                           threshold=threshold,
                                           operator=operator,
                                           support = tree.n_node_samples[node_id] / float(tree.n_node_samples[0]),
                                           feature_name=feature_name)
            new_conditions = conditions + [rule_condition]
        else:
            new_conditions = []
        ## if not terminal node
        if tree.children_left[node_id] != tree.children_right[node_id]: 
            feature = tree.feature[node_id]
            threshold = tree.threshold[node_id]
            
            left_node_id = tree.children_left[node_id]
            traverse_nodes(left_node_id, "<=", threshold, feature, new_conditions)
            
            right_node_id = tree.children_right[node_id]
            traverse_nodes(right_node_id, ">", threshold, feature, new_conditions)
        else: # a leaf node
            if len(new_conditions)>0:
                new_rule = Rule(new_conditions,tree.value[node_id][0][0])
                rules.update([new_rule])
            else:
                pass #tree only has a root node!
            return None

    traverse_nodes()
    
    return rules



class RuleEnsemble():
    """Ensemble of binary decision rules

    This class implements an ensemble of decision rules that extracts rules from
    an ensemble of decision trees.

    Parameters
    ----------
    tree_list: List or array of DecisionTreeClassifier or DecisionTreeRegressor
        Trees from which the rules are created

    feature_names: List of strings, optional (default=None)
        Names of the features

    Attributes
    ----------
    rules: List of Rule
        The ensemble of rules extracted from the trees
    """
    def __init__(self,
                 tree_list,
                 feature_names=None):
        self.tree_list = tree_list
        self.feature_names = feature_names
        self.rules = set()
        ## TODO: Move this out of __init__
        self._extract_rules()
        self.rules=list(self.rules)

    def _extract_rules(self):
        """Recursively extract rules from each tree in the ensemble

        """
        for tree in self.tree_list:
            rules = extract_rules_from_tree(tree[0].tree_,feature_names=self.feature_names)
            self.rules.update(rules)

    def filter_rules(self, func):
        self.rules = filter(lambda x: func(x), self.rules)

    def filter_short_rules(self, k):
        self.filter_rules(lambda x: len(x.conditions) > k)

    def transform(self, X,coefs=None):
        """Transform dataset.

        Parameters
        ----------
        X:      array-like matrix, shape=(n_samples, n_features)
        coefs:  (optional) if supplied, this makes the prediction
                slightly more efficient by setting rules with zero 
                coefficients to zero without calling Rule.transform().
        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, n_out)
            Transformed dataset. Each column represents one rule.
        """
        rule_list=list(self.rules) 
        if   coefs is None :
            return np.array([rule.transform(X) for rule in rule_list]).T
        else: # else use the coefs to filter the rules we bother to interpret
            res= np.array([rule_list[i_rule].transform(X) for i_rule in np.arange(len(rule_list)) if coefs[i_rule]!=0]).T
            res_=np.zeros([X.shape[0],len(rule_list)])
            res_[:,coefs!=0]=res
            return res_
    def __str__(self):
        return (map(lambda x: x.__str__(), self.rules)).__str__()




class RuleFit(BaseEstimator, TransformerMixin):
    """Rulefit class


    Parameters
    ----------
        tree_size:      Number of terminal nodes in generated trees. If exp_rand_tree_size=True, 
                        this will be the mean number of terminal nodes.
        sample_fract:   fraction of randomly chosen training observations used to produce each tree. 
                        FP 2004 (Sec. 2)
        max_rules:      approximate total number of rules generated for fitting. Note that actual
                        number of rules will usually be lower than this due to duplicates.
        memory_par:     scale multiplier (shrinkage factor) applied to each new tree when 
                        sequentially induced. FP 2004 (Sec. 2)
        rfmode:         'regress' for regression or 'classify' for binary classification.
        lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2
                        by multiplying the winsorised variable by 0.4/stdev.
        lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear 
                        terms before standardisation.
        exp_rand_tree_size: If True, each boosted tree will have a different maximum number of 
                        terminal nodes based on an exponential distribution about tree_size. 
                        (Friedman Sec 3.3)
        model_type:     'r': rules only; 'l': linear terms only; 'rl': both rules and linear terms
        random_state:   Integer to initialise random objects and provide repeatability.
        tree_generator: Optional: this object will be used as provided to generate the rules. 
                        This will override almost all the other properties above. 
                        Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)

    Attributes
    ----------
    rule_ensemble: RuleEnsemble
        The rule ensemble

    feature_names: list of strings, optional (default=None)
        The names of the features (columns)

    """
    def __init__(self,tree_size=4,sample_fract='default',max_rules=2000,
                 memory_par=0.01,
                 tree_generator=None,
                rfmode='regress',lin_trim_quantile=0.025,
                lin_standardise=True, exp_rand_tree_size=True,
                model_type='rl',Cs=None,cv=3,random_state=None):
        self.tree_generator = tree_generator
        self.rfmode=rfmode
        self.lin_trim_quantile=lin_trim_quantile
        self.lin_standardise=lin_standardise
        self.winsorizer=Winsorizer(trim_quantile=lin_trim_quantile)
        self.friedscale=FriedScale(self.winsorizer)
        self.stddev = None
        self.mean = None
        self.exp_rand_tree_size=exp_rand_tree_size
        self.max_rules=max_rules
        self.sample_fract=sample_fract 
        self.max_rules=max_rules
        self.memory_par=memory_par
        self.tree_size=tree_size
        self.random_state=random_state
        self.model_type=model_type
        self.cv=cv
        self.Cs=Cs
        
    def fit(self, X, y=None, feature_names=None, verbose=False):
        """Fit and estimate linear combination of rule ensemble

        """
        if type(X) == pd.DataFrame:
            X = X.values
        if type(y) in [pd.DataFrame, pd.Series]:
            y = y.values
            
        ## Enumerate features if feature names not provided
        N=X.shape[0]
        if feature_names is None:
            self.feature_names = ['feature_' + str(x) for x in range(0, X.shape[1])]
        else:
            self.feature_names=feature_names
        if 'r' in self.model_type:
            ## initialise tree generator
            if self.tree_generator is None:
                n_estimators_default=int(np.ceil(self.max_rules/self.tree_size))
                self.sample_fract_=min(0.5,(100+6*np.sqrt(N))/N)
                if   self.rfmode=='regress':
                    self.tree_generator = GradientBoostingRegressor(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)
                else:
                    self.tree_generator =GradientBoostingClassifier(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)
    
            if   self.rfmode=='regress':
                if type(self.tree_generator) not in [GradientBoostingRegressor,RandomForestRegressor]:
                    raise ValueError("RuleFit only works with RandomForest and BoostingRegressor")
            else:
                if type(self.tree_generator) not in [GradientBoostingClassifier,RandomForestClassifier]:
                    raise ValueError("RuleFit only works with RandomForest and BoostingClassifier")
    
            ## fit tree generator
            if not self.exp_rand_tree_size: # simply fit with constant tree size
                self.tree_generator.fit(X, y)
            else: # randomise tree size as per Friedman 2005 Sec 3.3
                np.random.seed(self.random_state)
                tree_sizes=np.random.exponential(scale=self.tree_size-2,size=int(np.ceil(self.max_rules*2/self.tree_size)))
                tree_sizes=np.asarray([2+np.floor(tree_sizes[i_]) for i_ in np.arange(len(tree_sizes))],dtype=int)
                i=int(len(tree_sizes)/4)
                while np.sum(tree_sizes[0:i])<self.max_rules:
                    i=i+1
                tree_sizes=tree_sizes[0:i]
                self.tree_generator.set_params(warm_start=True) 
                curr_est_=0
                for i_size in np.arange(len(tree_sizes)):
                    size=tree_sizes[i_size]
                    self.tree_generator.set_params(n_estimators=curr_est_+1)
                    self.tree_generator.set_params(max_leaf_nodes=size)
                    random_state_add = self.random_state if self.random_state else 0
                    self.tree_generator.set_params(random_state=i_size+random_state_add) # warm_state=True seems to reset random_state, such that the trees are highly correlated, unless we manually change the random_sate here.
                    self.tree_generator.get_params()['n_estimators']
                    self.tree_generator.fit(np.copy(X, order='C'), np.copy(y, order='C'))
                    curr_est_=curr_est_+1
                self.tree_generator.set_params(warm_start=False) 
            tree_list = self.tree_generator.estimators_
            if isinstance(self.tree_generator, RandomForestRegressor) or isinstance(self.tree_generator, RandomForestClassifier):
                 tree_list = [[x] for x in self.tree_generator.estimators_]
                 
            ## extract rules
            self.rule_ensemble = RuleEnsemble(tree_list = tree_list,
                                              feature_names=self.feature_names)

            ## concatenate original features and rules
            X_rules = self.rule_ensemble.transform(X)
        
        ## standardise linear variables if requested (for regression model only)
        if 'l' in self.model_type: 

            ## standard deviation and mean of winsorized features
            self.winsorizer.train(X)
            winsorized_X = self.winsorizer.trim(X)
            self.stddev = np.std(winsorized_X, axis = 0)
            self.mean = np.mean(winsorized_X, axis = 0)

            if self.lin_standardise:
                self.friedscale.train(X)
                X_regn=self.friedscale.scale(X)
            else:
                X_regn=X.copy()            
        
        ## Compile Training data
        X_concat=np.zeros([X.shape[0],0])
        if 'l' in self.model_type:
            X_concat = np.concatenate((X_concat,X_regn), axis=1)
        if 'r' in self.model_type:
            if X_rules.shape[0] >0:
                X_concat = np.concatenate((X_concat, X_rules), axis=1)

        ## fit Lasso
        if self.rfmode=='regress':
            if self.Cs is None: # use defaultshasattr(self.Cs, "__len__"):
                n_alphas= 100
                alphas=None
            elif hasattr(self.Cs, "__len__"):
                n_alphas= None
                alphas=1./self.Cs
            else:
                n_alphas= self.Cs
                alphas=None
            self.lscv = LassoCV(n_alphas=n_alphas,alphas=alphas,cv=self.cv,random_state=self.random_state)
            self.lscv.fit(X_concat, y)
            self.coef_=self.lscv.coef_
            self.intercept_=self.lscv.intercept_
        else:
            Cs=10 if self.Cs is None else self.Cs
            self.lscv=LogisticRegressionCV(Cs=Cs,cv=self.cv,penalty='l1',random_state=self.random_state,solver='liblinear')
            self.lscv.fit(X_concat, y)
            self.coef_=self.lscv.coef_[0]
            self.intercept_=self.lscv.intercept_[0]
        
        
        
        return self

    def predict(self, X):
        """Predict outcome for X

        """
        if type(X) == pd.DataFrame:
            X = X.values.astype(np.float32)
        
        X_concat=np.zeros([X.shape[0],0])
        if 'l' in self.model_type:
            if self.lin_standardise:
                X_concat = np.concatenate((X_concat,self.friedscale.scale(X)), axis=1)
            else:
                X_concat = np.concatenate((X_concat,X), axis=1)
        if 'r' in self.model_type:
            rule_coefs=self.coef_[-len(self.rule_ensemble.rules):] 
            if len(rule_coefs)>0:
                X_rules = self.rule_ensemble.transform(X,coefs=rule_coefs)
                if X_rules.shape[0] >0:
                    X_concat = np.concatenate((X_concat, X_rules), axis=1)
        return self.lscv.predict(X_concat)
    
    def predict_proba(self, X):
        y = self.predict(X)
        return np.vstack((1 - y, y)).transpose()

    def transform(self, X=None, y=None):
        """Transform dataset.

        Parameters
        ----------
        X : array-like matrix, shape=(n_samples, n_features)
            Input data to be transformed. Use ``dtype=np.float32`` for maximum
            efficiency.

        Returns
        -------
        X_transformed: matrix, shape=(n_samples, n_out)
            Transformed data set
        """
        return self.rule_ensemble.transform(X)

    def get_rules(self, exclude_zero_coef=False, subregion=None):
        """Return the estimated rules

        Parameters
        ----------
        exclude_zero_coef: If True (default), returns only the rules with an estimated
                           coefficient not equalt to  zero.

        subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over 
                           subregion of inputs (FP 2004 eq. 30/31/32).

        Returns
        -------
        rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds
               the coefficients and 'support' the support of the rule in the training
               data set (X)
        """

        n_features= len(self.coef_) - len(self.rule_ensemble.rules)
        rule_ensemble = list(self.rule_ensemble.rules)
        output_rules = []
        ## Add coefficients for linear effects
        for i in range(0, n_features):
            if self.lin_standardise:
                coef=self.coef_[i]*self.friedscale.scale_multipliers[i]
            else:
                coef=self.coef_[i]
            if subregion is None:
                importance = abs(coef)*self.stddev[i]
            else:
                subregion = np.array(subregion)
                importance = sum(abs(coef)* abs([ x[i] for x in self.winsorizer.trim(subregion) ] - self.mean[i]))/len(subregion)
            output_rules += [(self.feature_names[i], 'linear',coef, 1, importance)]

        ## Add rules
        for i in range(0, len(self.rule_ensemble.rules)):
            rule = rule_ensemble[i]
            coef=self.coef_[i + n_features]

            if subregion is None:
                importance = abs(coef)*(rule.support * (1-rule.support))**(1/2)
            else:
                rkx = rule.transform(subregion)
                importance = sum(abs(coef) * abs(rkx - rule.support))/len(subregion)

            output_rules += [(rule.__str__(), 'rule', coef,  rule.support, importance)]
        rules = pd.DataFrame(output_rules, columns=["rule", "type","coef", "support", "importance"])
        if exclude_zero_coef:
            rules = rules.ix[rules.coef != 0]
        return rules

Functions

def extract_rules_from_tree(tree, feature_names=None)

Helper to turn a tree into as set of rules

Expand source code
def extract_rules_from_tree(tree, feature_names=None):
    """Helper to turn a tree into as set of rules
    """
    rules = set()

    def traverse_nodes(node_id=0,
                       operator=None,
                       threshold=None,
                       feature=None,
                       conditions=[]):
        if node_id != 0:
            if feature_names is not None:
                feature_name = feature_names[feature]
            else:
                feature_name = feature
            rule_condition = RuleCondition(feature_index=feature,
                                           threshold=threshold,
                                           operator=operator,
                                           support = tree.n_node_samples[node_id] / float(tree.n_node_samples[0]),
                                           feature_name=feature_name)
            new_conditions = conditions + [rule_condition]
        else:
            new_conditions = []
        ## if not terminal node
        if tree.children_left[node_id] != tree.children_right[node_id]: 
            feature = tree.feature[node_id]
            threshold = tree.threshold[node_id]
            
            left_node_id = tree.children_left[node_id]
            traverse_nodes(left_node_id, "<=", threshold, feature, new_conditions)
            
            right_node_id = tree.children_right[node_id]
            traverse_nodes(right_node_id, ">", threshold, feature, new_conditions)
        else: # a leaf node
            if len(new_conditions)>0:
                new_rule = Rule(new_conditions,tree.value[node_id][0][0])
                rules.update([new_rule])
            else:
                pass #tree only has a root node!
            return None

    traverse_nodes()
    
    return rules

Classes

class FriedScale (winsorizer=None)

Performs scaling of linear variables according to Friedman et al. 2005 Sec 5

Each variable is first Winsorized l->l, then standardised as 0.4 x l / std(l*) Warning: this class should not be used directly.

Expand source code
class FriedScale():
    """Performs scaling of linear variables according to Friedman et al. 2005 Sec 5

    Each variable is first Winsorized l->l*, then standardised as 0.4 x l* / std(l*)
    Warning: this class should not be used directly.
    """    
    def __init__(self, winsorizer = None):
        self.scale_multipliers=None
        self.winsorizer = winsorizer
        
    def train(self,X):
        # get multipliers
        if self.winsorizer != None:
            X_trimmed= self.winsorizer.trim(X)
        else:
            X_trimmed = X

        scale_multipliers=np.ones(X.shape[1])
        for i_col in np.arange(X.shape[1]):
            num_uniq_vals=len(np.unique(X[:,i_col]))
            if num_uniq_vals>2: # don't scale binary variables which are effectively already rules
                scale_multipliers[i_col]=0.4/(1.0e-12 + np.std(X_trimmed[:,i_col]))
        self.scale_multipliers=scale_multipliers
        
    def scale(self,X):
        if self.winsorizer != None:
            return self.winsorizer.trim(X)*self.scale_multipliers
        else:
            return X*self.scale_multipliers

Methods

def scale(self, X)
Expand source code
def scale(self,X):
    if self.winsorizer != None:
        return self.winsorizer.trim(X)*self.scale_multipliers
    else:
        return X*self.scale_multipliers
def train(self, X)
Expand source code
def train(self,X):
    # get multipliers
    if self.winsorizer != None:
        X_trimmed= self.winsorizer.trim(X)
    else:
        X_trimmed = X

    scale_multipliers=np.ones(X.shape[1])
    for i_col in np.arange(X.shape[1]):
        num_uniq_vals=len(np.unique(X[:,i_col]))
        if num_uniq_vals>2: # don't scale binary variables which are effectively already rules
            scale_multipliers[i_col]=0.4/(1.0e-12 + np.std(X_trimmed[:,i_col]))
    self.scale_multipliers=scale_multipliers
class Rule (rule_conditions, prediction_value)

Class for binary Rules from list of conditions

Warning: this class should not be used directly.

Expand source code
class Rule():
    """Class for binary Rules from list of conditions

    Warning: this class should not be used directly.
    """
    def __init__(self,
                 rule_conditions,prediction_value):
        self.conditions = set(rule_conditions)
        self.support = min([x.support for x in rule_conditions])
        self.prediction_value=prediction_value
        self.rule_direction=None
    def transform(self, X):
        """Transform dataset.

        Parameters
        ----------
        X: array-like matrix

        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, 1)
        """
        rule_applies = [condition.transform(X) for condition in self.conditions]
        return reduce(lambda x,y: x * y, rule_applies)

    def __str__(self):
        return  " & ".join([x.__str__() for x in self.conditions])

    def __repr__(self):
        return self.__str__()

    def __hash__(self):
        return sum([condition.__hash__() for condition in self.conditions])

    def __eq__(self, other):
        return self.__hash__() == other.__hash__()

Methods

def transform(self, X)

Transform dataset.

Parameters

X : array-like matrix
 

Returns

X_transformed : array-like matrix, shape=(n_samples, 1)
 
Expand source code
def transform(self, X):
    """Transform dataset.

    Parameters
    ----------
    X: array-like matrix

    Returns
    -------
    X_transformed: array-like matrix, shape=(n_samples, 1)
    """
    rule_applies = [condition.transform(X) for condition in self.conditions]
    return reduce(lambda x,y: x * y, rule_applies)
class RuleCondition (feature_index, threshold, operator, support, feature_name=None)

Class for binary rule condition

Warning: this class should not be used directly.

Expand source code
class RuleCondition():
    """Class for binary rule condition

    Warning: this class should not be used directly.
    """

    def __init__(self,
                 feature_index,
                 threshold,
                 operator,
                 support,
                 feature_name = None):
        self.feature_index = feature_index
        self.threshold = threshold
        self.operator = operator
        self.support = support
        self.feature_name = feature_name


    def __repr__(self):
        return self.__str__()

    def __str__(self):
        if self.feature_name:
            feature = self.feature_name
        else:
            feature = self.feature_index
        return "%s %s %s" % (feature, self.operator, self.threshold)

    def transform(self, X):
        """Transform dataset.

        Parameters
        ----------
        X: array-like matrix, shape=(n_samples, n_features)

        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, 1)
        """
        if self.operator == "<=":
            res =  1 * (X[:,self.feature_index] <= self.threshold)
        elif self.operator == ">":
            res = 1 * (X[:,self.feature_index] > self.threshold)
        return res

    def __eq__(self, other):
        return self.__hash__() == other.__hash__()

    def __hash__(self):
        return hash((self.feature_index, self.threshold, self.operator, self.feature_name))

Methods

def transform(self, X)

Transform dataset.

Parameters

X : array-like matrix, shape=(n_samples, n_features)
 

Returns

X_transformed : array-like matrix, shape=(n_samples, 1)
 
Expand source code
def transform(self, X):
    """Transform dataset.

    Parameters
    ----------
    X: array-like matrix, shape=(n_samples, n_features)

    Returns
    -------
    X_transformed: array-like matrix, shape=(n_samples, 1)
    """
    if self.operator == "<=":
        res =  1 * (X[:,self.feature_index] <= self.threshold)
    elif self.operator == ">":
        res = 1 * (X[:,self.feature_index] > self.threshold)
    return res
class RuleEnsemble (tree_list, feature_names=None)

Ensemble of binary decision rules

This class implements an ensemble of decision rules that extracts rules from an ensemble of decision trees.

Parameters

tree_list : List or array of DecisionTreeClassifier or DecisionTreeRegressor
Trees from which the rules are created
feature_names : List of strings, optional (default=None)
Names of the features

Attributes

rules : List of Rule
The ensemble of rules extracted from the trees
Expand source code
class RuleEnsemble():
    """Ensemble of binary decision rules

    This class implements an ensemble of decision rules that extracts rules from
    an ensemble of decision trees.

    Parameters
    ----------
    tree_list: List or array of DecisionTreeClassifier or DecisionTreeRegressor
        Trees from which the rules are created

    feature_names: List of strings, optional (default=None)
        Names of the features

    Attributes
    ----------
    rules: List of Rule
        The ensemble of rules extracted from the trees
    """
    def __init__(self,
                 tree_list,
                 feature_names=None):
        self.tree_list = tree_list
        self.feature_names = feature_names
        self.rules = set()
        ## TODO: Move this out of __init__
        self._extract_rules()
        self.rules=list(self.rules)

    def _extract_rules(self):
        """Recursively extract rules from each tree in the ensemble

        """
        for tree in self.tree_list:
            rules = extract_rules_from_tree(tree[0].tree_,feature_names=self.feature_names)
            self.rules.update(rules)

    def filter_rules(self, func):
        self.rules = filter(lambda x: func(x), self.rules)

    def filter_short_rules(self, k):
        self.filter_rules(lambda x: len(x.conditions) > k)

    def transform(self, X,coefs=None):
        """Transform dataset.

        Parameters
        ----------
        X:      array-like matrix, shape=(n_samples, n_features)
        coefs:  (optional) if supplied, this makes the prediction
                slightly more efficient by setting rules with zero 
                coefficients to zero without calling Rule.transform().
        Returns
        -------
        X_transformed: array-like matrix, shape=(n_samples, n_out)
            Transformed dataset. Each column represents one rule.
        """
        rule_list=list(self.rules) 
        if   coefs is None :
            return np.array([rule.transform(X) for rule in rule_list]).T
        else: # else use the coefs to filter the rules we bother to interpret
            res= np.array([rule_list[i_rule].transform(X) for i_rule in np.arange(len(rule_list)) if coefs[i_rule]!=0]).T
            res_=np.zeros([X.shape[0],len(rule_list)])
            res_[:,coefs!=0]=res
            return res_
    def __str__(self):
        return (map(lambda x: x.__str__(), self.rules)).__str__()

Methods

def filter_rules(self, func)
Expand source code
def filter_rules(self, func):
    self.rules = filter(lambda x: func(x), self.rules)
def filter_short_rules(self, k)
Expand source code
def filter_short_rules(self, k):
    self.filter_rules(lambda x: len(x.conditions) > k)
def transform(self, X, coefs=None)

Transform dataset.

Parameters

X :  array-like matrix, shape=(n_samples, n_features)
 
coefs :  (optional) if supplied, this makes the prediction
slightly more efficient by setting rules with zero coefficients to zero without calling Rule.transform().

Returns

X_transformed : array-like matrix, shape=(n_samples, n_out)
Transformed dataset. Each column represents one rule.
Expand source code
def transform(self, X,coefs=None):
    """Transform dataset.

    Parameters
    ----------
    X:      array-like matrix, shape=(n_samples, n_features)
    coefs:  (optional) if supplied, this makes the prediction
            slightly more efficient by setting rules with zero 
            coefficients to zero without calling Rule.transform().
    Returns
    -------
    X_transformed: array-like matrix, shape=(n_samples, n_out)
        Transformed dataset. Each column represents one rule.
    """
    rule_list=list(self.rules) 
    if   coefs is None :
        return np.array([rule.transform(X) for rule in rule_list]).T
    else: # else use the coefs to filter the rules we bother to interpret
        res= np.array([rule_list[i_rule].transform(X) for i_rule in np.arange(len(rule_list)) if coefs[i_rule]!=0]).T
        res_=np.zeros([X.shape[0],len(rule_list)])
        res_[:,coefs!=0]=res
        return res_
class RuleFit (tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, rfmode='regress', lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, model_type='rl', Cs=None, cv=3, random_state=None)

Rulefit class

Parameters

tree_size:      Number of terminal nodes in generated trees. If exp_rand_tree_size=True, 
                this will be the mean number of terminal nodes.
sample_fract:   fraction of randomly chosen training observations used to produce each tree. 
                FP 2004 (Sec. 2)
max_rules:      approximate total number of rules generated for fitting. Note that actual
                number of rules will usually be lower than this due to duplicates.
memory_par:     scale multiplier (shrinkage factor) applied to each new tree when 
                sequentially induced. FP 2004 (Sec. 2)
rfmode:         'regress' for regression or 'classify' for binary classification.
lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2
                by multiplying the winsorised variable by 0.4/stdev.
lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear 
                terms before standardisation.
exp_rand_tree_size: If True, each boosted tree will have a different maximum number of 
                terminal nodes based on an exponential distribution about tree_size. 
                (Friedman Sec 3.3)
model_type:     'r': rules only; 'l': linear terms only; 'rl': both rules and linear terms
random_state:   Integer to initialise random objects and provide repeatability.
tree_generator: Optional: this object will be used as provided to generate the rules. 
                This will override almost all the other properties above. 
                Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)

Attributes

rule_ensemble : RuleEnsemble
The rule ensemble
feature_names : list of strings, optional (default=None)
The names of the features (columns)
Expand source code
class RuleFit(BaseEstimator, TransformerMixin):
    """Rulefit class


    Parameters
    ----------
        tree_size:      Number of terminal nodes in generated trees. If exp_rand_tree_size=True, 
                        this will be the mean number of terminal nodes.
        sample_fract:   fraction of randomly chosen training observations used to produce each tree. 
                        FP 2004 (Sec. 2)
        max_rules:      approximate total number of rules generated for fitting. Note that actual
                        number of rules will usually be lower than this due to duplicates.
        memory_par:     scale multiplier (shrinkage factor) applied to each new tree when 
                        sequentially induced. FP 2004 (Sec. 2)
        rfmode:         'regress' for regression or 'classify' for binary classification.
        lin_standardise: If True, the linear terms will be standardised as per Friedman Sec 3.2
                        by multiplying the winsorised variable by 0.4/stdev.
        lin_trim_quantile: If lin_standardise is True, this quantile will be used to trim linear 
                        terms before standardisation.
        exp_rand_tree_size: If True, each boosted tree will have a different maximum number of 
                        terminal nodes based on an exponential distribution about tree_size. 
                        (Friedman Sec 3.3)
        model_type:     'r': rules only; 'l': linear terms only; 'rl': both rules and linear terms
        random_state:   Integer to initialise random objects and provide repeatability.
        tree_generator: Optional: this object will be used as provided to generate the rules. 
                        This will override almost all the other properties above. 
                        Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)

    Attributes
    ----------
    rule_ensemble: RuleEnsemble
        The rule ensemble

    feature_names: list of strings, optional (default=None)
        The names of the features (columns)

    """
    def __init__(self,tree_size=4,sample_fract='default',max_rules=2000,
                 memory_par=0.01,
                 tree_generator=None,
                rfmode='regress',lin_trim_quantile=0.025,
                lin_standardise=True, exp_rand_tree_size=True,
                model_type='rl',Cs=None,cv=3,random_state=None):
        self.tree_generator = tree_generator
        self.rfmode=rfmode
        self.lin_trim_quantile=lin_trim_quantile
        self.lin_standardise=lin_standardise
        self.winsorizer=Winsorizer(trim_quantile=lin_trim_quantile)
        self.friedscale=FriedScale(self.winsorizer)
        self.stddev = None
        self.mean = None
        self.exp_rand_tree_size=exp_rand_tree_size
        self.max_rules=max_rules
        self.sample_fract=sample_fract 
        self.max_rules=max_rules
        self.memory_par=memory_par
        self.tree_size=tree_size
        self.random_state=random_state
        self.model_type=model_type
        self.cv=cv
        self.Cs=Cs
        
    def fit(self, X, y=None, feature_names=None, verbose=False):
        """Fit and estimate linear combination of rule ensemble

        """
        if type(X) == pd.DataFrame:
            X = X.values
        if type(y) in [pd.DataFrame, pd.Series]:
            y = y.values
            
        ## Enumerate features if feature names not provided
        N=X.shape[0]
        if feature_names is None:
            self.feature_names = ['feature_' + str(x) for x in range(0, X.shape[1])]
        else:
            self.feature_names=feature_names
        if 'r' in self.model_type:
            ## initialise tree generator
            if self.tree_generator is None:
                n_estimators_default=int(np.ceil(self.max_rules/self.tree_size))
                self.sample_fract_=min(0.5,(100+6*np.sqrt(N))/N)
                if   self.rfmode=='regress':
                    self.tree_generator = GradientBoostingRegressor(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)
                else:
                    self.tree_generator =GradientBoostingClassifier(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)
    
            if   self.rfmode=='regress':
                if type(self.tree_generator) not in [GradientBoostingRegressor,RandomForestRegressor]:
                    raise ValueError("RuleFit only works with RandomForest and BoostingRegressor")
            else:
                if type(self.tree_generator) not in [GradientBoostingClassifier,RandomForestClassifier]:
                    raise ValueError("RuleFit only works with RandomForest and BoostingClassifier")
    
            ## fit tree generator
            if not self.exp_rand_tree_size: # simply fit with constant tree size
                self.tree_generator.fit(X, y)
            else: # randomise tree size as per Friedman 2005 Sec 3.3
                np.random.seed(self.random_state)
                tree_sizes=np.random.exponential(scale=self.tree_size-2,size=int(np.ceil(self.max_rules*2/self.tree_size)))
                tree_sizes=np.asarray([2+np.floor(tree_sizes[i_]) for i_ in np.arange(len(tree_sizes))],dtype=int)
                i=int(len(tree_sizes)/4)
                while np.sum(tree_sizes[0:i])<self.max_rules:
                    i=i+1
                tree_sizes=tree_sizes[0:i]
                self.tree_generator.set_params(warm_start=True) 
                curr_est_=0
                for i_size in np.arange(len(tree_sizes)):
                    size=tree_sizes[i_size]
                    self.tree_generator.set_params(n_estimators=curr_est_+1)
                    self.tree_generator.set_params(max_leaf_nodes=size)
                    random_state_add = self.random_state if self.random_state else 0
                    self.tree_generator.set_params(random_state=i_size+random_state_add) # warm_state=True seems to reset random_state, such that the trees are highly correlated, unless we manually change the random_sate here.
                    self.tree_generator.get_params()['n_estimators']
                    self.tree_generator.fit(np.copy(X, order='C'), np.copy(y, order='C'))
                    curr_est_=curr_est_+1
                self.tree_generator.set_params(warm_start=False) 
            tree_list = self.tree_generator.estimators_
            if isinstance(self.tree_generator, RandomForestRegressor) or isinstance(self.tree_generator, RandomForestClassifier):
                 tree_list = [[x] for x in self.tree_generator.estimators_]
                 
            ## extract rules
            self.rule_ensemble = RuleEnsemble(tree_list = tree_list,
                                              feature_names=self.feature_names)

            ## concatenate original features and rules
            X_rules = self.rule_ensemble.transform(X)
        
        ## standardise linear variables if requested (for regression model only)
        if 'l' in self.model_type: 

            ## standard deviation and mean of winsorized features
            self.winsorizer.train(X)
            winsorized_X = self.winsorizer.trim(X)
            self.stddev = np.std(winsorized_X, axis = 0)
            self.mean = np.mean(winsorized_X, axis = 0)

            if self.lin_standardise:
                self.friedscale.train(X)
                X_regn=self.friedscale.scale(X)
            else:
                X_regn=X.copy()            
        
        ## Compile Training data
        X_concat=np.zeros([X.shape[0],0])
        if 'l' in self.model_type:
            X_concat = np.concatenate((X_concat,X_regn), axis=1)
        if 'r' in self.model_type:
            if X_rules.shape[0] >0:
                X_concat = np.concatenate((X_concat, X_rules), axis=1)

        ## fit Lasso
        if self.rfmode=='regress':
            if self.Cs is None: # use defaultshasattr(self.Cs, "__len__"):
                n_alphas= 100
                alphas=None
            elif hasattr(self.Cs, "__len__"):
                n_alphas= None
                alphas=1./self.Cs
            else:
                n_alphas= self.Cs
                alphas=None
            self.lscv = LassoCV(n_alphas=n_alphas,alphas=alphas,cv=self.cv,random_state=self.random_state)
            self.lscv.fit(X_concat, y)
            self.coef_=self.lscv.coef_
            self.intercept_=self.lscv.intercept_
        else:
            Cs=10 if self.Cs is None else self.Cs
            self.lscv=LogisticRegressionCV(Cs=Cs,cv=self.cv,penalty='l1',random_state=self.random_state,solver='liblinear')
            self.lscv.fit(X_concat, y)
            self.coef_=self.lscv.coef_[0]
            self.intercept_=self.lscv.intercept_[0]
        
        
        
        return self

    def predict(self, X):
        """Predict outcome for X

        """
        if type(X) == pd.DataFrame:
            X = X.values.astype(np.float32)
        
        X_concat=np.zeros([X.shape[0],0])
        if 'l' in self.model_type:
            if self.lin_standardise:
                X_concat = np.concatenate((X_concat,self.friedscale.scale(X)), axis=1)
            else:
                X_concat = np.concatenate((X_concat,X), axis=1)
        if 'r' in self.model_type:
            rule_coefs=self.coef_[-len(self.rule_ensemble.rules):] 
            if len(rule_coefs)>0:
                X_rules = self.rule_ensemble.transform(X,coefs=rule_coefs)
                if X_rules.shape[0] >0:
                    X_concat = np.concatenate((X_concat, X_rules), axis=1)
        return self.lscv.predict(X_concat)
    
    def predict_proba(self, X):
        y = self.predict(X)
        return np.vstack((1 - y, y)).transpose()

    def transform(self, X=None, y=None):
        """Transform dataset.

        Parameters
        ----------
        X : array-like matrix, shape=(n_samples, n_features)
            Input data to be transformed. Use ``dtype=np.float32`` for maximum
            efficiency.

        Returns
        -------
        X_transformed: matrix, shape=(n_samples, n_out)
            Transformed data set
        """
        return self.rule_ensemble.transform(X)

    def get_rules(self, exclude_zero_coef=False, subregion=None):
        """Return the estimated rules

        Parameters
        ----------
        exclude_zero_coef: If True (default), returns only the rules with an estimated
                           coefficient not equalt to  zero.

        subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over 
                           subregion of inputs (FP 2004 eq. 30/31/32).

        Returns
        -------
        rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds
               the coefficients and 'support' the support of the rule in the training
               data set (X)
        """

        n_features= len(self.coef_) - len(self.rule_ensemble.rules)
        rule_ensemble = list(self.rule_ensemble.rules)
        output_rules = []
        ## Add coefficients for linear effects
        for i in range(0, n_features):
            if self.lin_standardise:
                coef=self.coef_[i]*self.friedscale.scale_multipliers[i]
            else:
                coef=self.coef_[i]
            if subregion is None:
                importance = abs(coef)*self.stddev[i]
            else:
                subregion = np.array(subregion)
                importance = sum(abs(coef)* abs([ x[i] for x in self.winsorizer.trim(subregion) ] - self.mean[i]))/len(subregion)
            output_rules += [(self.feature_names[i], 'linear',coef, 1, importance)]

        ## Add rules
        for i in range(0, len(self.rule_ensemble.rules)):
            rule = rule_ensemble[i]
            coef=self.coef_[i + n_features]

            if subregion is None:
                importance = abs(coef)*(rule.support * (1-rule.support))**(1/2)
            else:
                rkx = rule.transform(subregion)
                importance = sum(abs(coef) * abs(rkx - rule.support))/len(subregion)

            output_rules += [(rule.__str__(), 'rule', coef,  rule.support, importance)]
        rules = pd.DataFrame(output_rules, columns=["rule", "type","coef", "support", "importance"])
        if exclude_zero_coef:
            rules = rules.ix[rules.coef != 0]
        return rules

Ancestors

Methods

def fit(self, X, y=None, feature_names=None, verbose=False)

Fit and estimate linear combination of rule ensemble

Expand source code
def fit(self, X, y=None, feature_names=None, verbose=False):
    """Fit and estimate linear combination of rule ensemble

    """
    if type(X) == pd.DataFrame:
        X = X.values
    if type(y) in [pd.DataFrame, pd.Series]:
        y = y.values
        
    ## Enumerate features if feature names not provided
    N=X.shape[0]
    if feature_names is None:
        self.feature_names = ['feature_' + str(x) for x in range(0, X.shape[1])]
    else:
        self.feature_names=feature_names
    if 'r' in self.model_type:
        ## initialise tree generator
        if self.tree_generator is None:
            n_estimators_default=int(np.ceil(self.max_rules/self.tree_size))
            self.sample_fract_=min(0.5,(100+6*np.sqrt(N))/N)
            if   self.rfmode=='regress':
                self.tree_generator = GradientBoostingRegressor(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)
            else:
                self.tree_generator =GradientBoostingClassifier(n_estimators=n_estimators_default, max_leaf_nodes=self.tree_size, learning_rate=self.memory_par,subsample=self.sample_fract_,random_state=self.random_state,max_depth=100)

        if   self.rfmode=='regress':
            if type(self.tree_generator) not in [GradientBoostingRegressor,RandomForestRegressor]:
                raise ValueError("RuleFit only works with RandomForest and BoostingRegressor")
        else:
            if type(self.tree_generator) not in [GradientBoostingClassifier,RandomForestClassifier]:
                raise ValueError("RuleFit only works with RandomForest and BoostingClassifier")

        ## fit tree generator
        if not self.exp_rand_tree_size: # simply fit with constant tree size
            self.tree_generator.fit(X, y)
        else: # randomise tree size as per Friedman 2005 Sec 3.3
            np.random.seed(self.random_state)
            tree_sizes=np.random.exponential(scale=self.tree_size-2,size=int(np.ceil(self.max_rules*2/self.tree_size)))
            tree_sizes=np.asarray([2+np.floor(tree_sizes[i_]) for i_ in np.arange(len(tree_sizes))],dtype=int)
            i=int(len(tree_sizes)/4)
            while np.sum(tree_sizes[0:i])<self.max_rules:
                i=i+1
            tree_sizes=tree_sizes[0:i]
            self.tree_generator.set_params(warm_start=True) 
            curr_est_=0
            for i_size in np.arange(len(tree_sizes)):
                size=tree_sizes[i_size]
                self.tree_generator.set_params(n_estimators=curr_est_+1)
                self.tree_generator.set_params(max_leaf_nodes=size)
                random_state_add = self.random_state if self.random_state else 0
                self.tree_generator.set_params(random_state=i_size+random_state_add) # warm_state=True seems to reset random_state, such that the trees are highly correlated, unless we manually change the random_sate here.
                self.tree_generator.get_params()['n_estimators']
                self.tree_generator.fit(np.copy(X, order='C'), np.copy(y, order='C'))
                curr_est_=curr_est_+1
            self.tree_generator.set_params(warm_start=False) 
        tree_list = self.tree_generator.estimators_
        if isinstance(self.tree_generator, RandomForestRegressor) or isinstance(self.tree_generator, RandomForestClassifier):
             tree_list = [[x] for x in self.tree_generator.estimators_]
             
        ## extract rules
        self.rule_ensemble = RuleEnsemble(tree_list = tree_list,
                                          feature_names=self.feature_names)

        ## concatenate original features and rules
        X_rules = self.rule_ensemble.transform(X)
    
    ## standardise linear variables if requested (for regression model only)
    if 'l' in self.model_type: 

        ## standard deviation and mean of winsorized features
        self.winsorizer.train(X)
        winsorized_X = self.winsorizer.trim(X)
        self.stddev = np.std(winsorized_X, axis = 0)
        self.mean = np.mean(winsorized_X, axis = 0)

        if self.lin_standardise:
            self.friedscale.train(X)
            X_regn=self.friedscale.scale(X)
        else:
            X_regn=X.copy()            
    
    ## Compile Training data
    X_concat=np.zeros([X.shape[0],0])
    if 'l' in self.model_type:
        X_concat = np.concatenate((X_concat,X_regn), axis=1)
    if 'r' in self.model_type:
        if X_rules.shape[0] >0:
            X_concat = np.concatenate((X_concat, X_rules), axis=1)

    ## fit Lasso
    if self.rfmode=='regress':
        if self.Cs is None: # use defaultshasattr(self.Cs, "__len__"):
            n_alphas= 100
            alphas=None
        elif hasattr(self.Cs, "__len__"):
            n_alphas= None
            alphas=1./self.Cs
        else:
            n_alphas= self.Cs
            alphas=None
        self.lscv = LassoCV(n_alphas=n_alphas,alphas=alphas,cv=self.cv,random_state=self.random_state)
        self.lscv.fit(X_concat, y)
        self.coef_=self.lscv.coef_
        self.intercept_=self.lscv.intercept_
    else:
        Cs=10 if self.Cs is None else self.Cs
        self.lscv=LogisticRegressionCV(Cs=Cs,cv=self.cv,penalty='l1',random_state=self.random_state,solver='liblinear')
        self.lscv.fit(X_concat, y)
        self.coef_=self.lscv.coef_[0]
        self.intercept_=self.lscv.intercept_[0]
    
    
    
    return self
def get_rules(self, exclude_zero_coef=False, subregion=None)

Return the estimated rules

Parameters

exclude_zero_coef : If True (default), returns only the rules with an estimated
coefficient not equalt to zero.
subregion : If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over
subregion of inputs (FP 2004 eq. 30/31/32).

Returns

rules : pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds
the coefficients and 'support' the support of the rule in the training data set (X)
Expand source code
def get_rules(self, exclude_zero_coef=False, subregion=None):
    """Return the estimated rules

    Parameters
    ----------
    exclude_zero_coef: If True (default), returns only the rules with an estimated
                       coefficient not equalt to  zero.

    subregion: If None (default) returns global importances (FP 2004 eq. 28/29), else returns importance over 
                       subregion of inputs (FP 2004 eq. 30/31/32).

    Returns
    -------
    rules: pandas.DataFrame with the rules. Column 'rule' describes the rule, 'coef' holds
           the coefficients and 'support' the support of the rule in the training
           data set (X)
    """

    n_features= len(self.coef_) - len(self.rule_ensemble.rules)
    rule_ensemble = list(self.rule_ensemble.rules)
    output_rules = []
    ## Add coefficients for linear effects
    for i in range(0, n_features):
        if self.lin_standardise:
            coef=self.coef_[i]*self.friedscale.scale_multipliers[i]
        else:
            coef=self.coef_[i]
        if subregion is None:
            importance = abs(coef)*self.stddev[i]
        else:
            subregion = np.array(subregion)
            importance = sum(abs(coef)* abs([ x[i] for x in self.winsorizer.trim(subregion) ] - self.mean[i]))/len(subregion)
        output_rules += [(self.feature_names[i], 'linear',coef, 1, importance)]

    ## Add rules
    for i in range(0, len(self.rule_ensemble.rules)):
        rule = rule_ensemble[i]
        coef=self.coef_[i + n_features]

        if subregion is None:
            importance = abs(coef)*(rule.support * (1-rule.support))**(1/2)
        else:
            rkx = rule.transform(subregion)
            importance = sum(abs(coef) * abs(rkx - rule.support))/len(subregion)

        output_rules += [(rule.__str__(), 'rule', coef,  rule.support, importance)]
    rules = pd.DataFrame(output_rules, columns=["rule", "type","coef", "support", "importance"])
    if exclude_zero_coef:
        rules = rules.ix[rules.coef != 0]
    return rules
def predict(self, X)

Predict outcome for X

Expand source code
def predict(self, X):
    """Predict outcome for X

    """
    if type(X) == pd.DataFrame:
        X = X.values.astype(np.float32)
    
    X_concat=np.zeros([X.shape[0],0])
    if 'l' in self.model_type:
        if self.lin_standardise:
            X_concat = np.concatenate((X_concat,self.friedscale.scale(X)), axis=1)
        else:
            X_concat = np.concatenate((X_concat,X), axis=1)
    if 'r' in self.model_type:
        rule_coefs=self.coef_[-len(self.rule_ensemble.rules):] 
        if len(rule_coefs)>0:
            X_rules = self.rule_ensemble.transform(X,coefs=rule_coefs)
            if X_rules.shape[0] >0:
                X_concat = np.concatenate((X_concat, X_rules), axis=1)
    return self.lscv.predict(X_concat)
def predict_proba(self, X)
Expand source code
def predict_proba(self, X):
    y = self.predict(X)
    return np.vstack((1 - y, y)).transpose()
def transform(self, X=None, y=None)

Transform dataset.

Parameters

X : array-like matrix, shape=(n_samples, n_features)
Input data to be transformed. Use dtype=np.float32 for maximum efficiency.

Returns

X_transformed : matrix, shape=(n_samples, n_out)
Transformed data set
Expand source code
def transform(self, X=None, y=None):
    """Transform dataset.

    Parameters
    ----------
    X : array-like matrix, shape=(n_samples, n_features)
        Input data to be transformed. Use ``dtype=np.float32`` for maximum
        efficiency.

    Returns
    -------
    X_transformed: matrix, shape=(n_samples, n_out)
        Transformed data set
    """
    return self.rule_ensemble.transform(X)
class Winsorizer (trim_quantile=0.0)

Performs Winsorization 1->1*

Warning: this class should not be used directly.

Expand source code
class Winsorizer():
    """Performs Winsorization 1->1*

    Warning: this class should not be used directly.
    """    
    def __init__(self,trim_quantile=0.0):
        self.trim_quantile=trim_quantile
        self.winsor_lims=None
        
    def train(self,X):
        # get winsor limits
        self.winsor_lims=np.ones([2,X.shape[1]])*np.inf
        self.winsor_lims[0,:]=-np.inf
        if self.trim_quantile>0:
            for i_col in np.arange(X.shape[1]):
                lower=np.percentile(X[:,i_col],self.trim_quantile*100)
                upper=np.percentile(X[:,i_col],100-self.trim_quantile*100)
                self.winsor_lims[:,i_col]=[lower,upper]
        
    def trim(self,X):
        X_=X.copy()
        X_=np.where(X>self.winsor_lims[1,:],np.tile(self.winsor_lims[1,:],[X.shape[0],1]),np.where(X<self.winsor_lims[0,:],np.tile(self.winsor_lims[0,:],[X.shape[0],1]),X))
        return X_

Methods

def train(self, X)
Expand source code
def train(self,X):
    # get winsor limits
    self.winsor_lims=np.ones([2,X.shape[1]])*np.inf
    self.winsor_lims[0,:]=-np.inf
    if self.trim_quantile>0:
        for i_col in np.arange(X.shape[1]):
            lower=np.percentile(X[:,i_col],self.trim_quantile*100)
            upper=np.percentile(X[:,i_col],100-self.trim_quantile*100)
            self.winsor_lims[:,i_col]=[lower,upper]
def trim(self, X)
Expand source code
def trim(self,X):
    X_=X.copy()
    X_=np.where(X>self.winsor_lims[1,:],np.tile(self.winsor_lims[1,:],[X.shape[0],1]),np.where(X<self.winsor_lims[0,:],np.tile(self.winsor_lims[0,:],[X.shape[0],1]),X))
    return X_