Module imodels.rule_set.fplasso
Expand source code
from typing import List
from imodels.rule_set.rule_fit import RuleFit
from imodels.util.extract import extract_fpgrowth
from imodels.util.convert import itemsets_to_rules
class FPLasso(RuleFit):
def __init__(self,
minsupport=0.1,
maxcardinality=2,
verbose=False,
tree_size=4,
sample_fract='default',
max_rules=2000,
memory_par=0.01,
tree_generator=None,
lin_trim_quantile=0.025,
lin_standardise=True,
exp_rand_tree_size=True,
include_linear=True,
alphas=None,
cv=3,
random_state=None):
super().__init__(tree_size,
sample_fract,
max_rules,
memory_par,
tree_generator,
lin_trim_quantile,
lin_standardise,
exp_rand_tree_size,
include_linear,
alphas,
cv,
random_state)
self.minsupport = minsupport
self.maxcardinality = maxcardinality
self.verbose = verbose
def fit(self, X, y=None, feature_names=None, undiscretized_features=[]):
self.undiscretized_features = undiscretized_features
super().fit(X, y, feature_names=feature_names)
return self
def _extract_rules(self, X, y) -> List[str]:
itemsets = extract_fpgrowth(X, y,
feature_labels=self.feature_placeholders,
minsupport=self.minsupport,
maxcardinality=self.maxcardinality,
undiscretized_features=self.undiscretized_features,
verbose=self.verbose)[0]
return itemsets_to_rules(itemsets)
class FPLassoRegressor(FPLasso):
def _init_prediction_task(self):
self.prediction_task = 'regression'
class FPLassoClassifier(FPLasso):
def _init_prediction_task(self):
self.prediction_task = 'classification'
Classes
class FPLasso (minsupport=0.1, maxcardinality=2, verbose=False, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None)
-
Rulefit class. Rather than using this class directly, should use RuleFitRegressor or RuleFitClassifier
Parameters
tree_size
:Number
ofterminal
nodes
in
generated
trees.
If
exp_rand_tree_size
=True
,- this will be the mean number of terminal nodes.
sample_fract
:fraction
ofrandomly
chosen
training
observations
used
to
produce
each
tree.
- FP 2004 (Sec. 2)
max_rules
:total
number
ofterms
included
in
the
final
model
(both
linear
and
rules
)- approximate total number of rules generated for fitting also is based on this Note that actual number of rules will usually be lower than this due to duplicates.
memory_par
:scale
multiplier
(shrinkage
factor
)applied
to
each
new
tree
when
- sequentially induced. FP 2004 (Sec. 2)
lin_standardise
:If
True
,the
linear
terms
will
be
standardised
as
per
Friedman
Sec
3.2
- by multiplying the winsorised variable by 0.4/stdev.
lin_trim_quantile
:If
lin_standardise
is
True
,this
quantile
will
be
used
to
trim
linear
- terms before standardisation.
exp_rand_tree_size
:If
True
,each
boosted
tree
will
have
a
different
maximum
number
of- terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3)
include_linear
:Include
linear
terms
as
opposed
to
only
rules
- random_state: Integer to initialise random objects and provide repeatability.
tree_generator
:Optional
:this
object
will
be
used
as
provided
to
generate
the
rules.
- This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)
Attributes
rule_ensemble
:RuleEnsemble
- The rule ensemble
feature_names
:list
ofstrings
, optional (default=None
)- The names of the features (columns)
Expand source code
class FPLasso(RuleFit): def __init__(self, minsupport=0.1, maxcardinality=2, verbose=False, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None): super().__init__(tree_size, sample_fract, max_rules, memory_par, tree_generator, lin_trim_quantile, lin_standardise, exp_rand_tree_size, include_linear, alphas, cv, random_state) self.minsupport = minsupport self.maxcardinality = maxcardinality self.verbose = verbose def fit(self, X, y=None, feature_names=None, undiscretized_features=[]): self.undiscretized_features = undiscretized_features super().fit(X, y, feature_names=feature_names) return self def _extract_rules(self, X, y) -> List[str]: itemsets = extract_fpgrowth(X, y, feature_labels=self.feature_placeholders, minsupport=self.minsupport, maxcardinality=self.maxcardinality, undiscretized_features=self.undiscretized_features, verbose=self.verbose)[0] return itemsets_to_rules(itemsets)
Ancestors
Subclasses
Inherited members
class FPLassoClassifier (minsupport=0.1, maxcardinality=2, verbose=False, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None)
-
Rulefit class. Rather than using this class directly, should use RuleFitRegressor or RuleFitClassifier
Parameters
tree_size
:Number
ofterminal
nodes
in
generated
trees.
If
exp_rand_tree_size
=True
,- this will be the mean number of terminal nodes.
sample_fract
:fraction
ofrandomly
chosen
training
observations
used
to
produce
each
tree.
- FP 2004 (Sec. 2)
max_rules
:total
number
ofterms
included
in
the
final
model
(both
linear
and
rules
)- approximate total number of rules generated for fitting also is based on this Note that actual number of rules will usually be lower than this due to duplicates.
memory_par
:scale
multiplier
(shrinkage
factor
)applied
to
each
new
tree
when
- sequentially induced. FP 2004 (Sec. 2)
lin_standardise
:If
True
,the
linear
terms
will
be
standardised
as
per
Friedman
Sec
3.2
- by multiplying the winsorised variable by 0.4/stdev.
lin_trim_quantile
:If
lin_standardise
is
True
,this
quantile
will
be
used
to
trim
linear
- terms before standardisation.
exp_rand_tree_size
:If
True
,each
boosted
tree
will
have
a
different
maximum
number
of- terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3)
include_linear
:Include
linear
terms
as
opposed
to
only
rules
- random_state: Integer to initialise random objects and provide repeatability.
tree_generator
:Optional
:this
object
will
be
used
as
provided
to
generate
the
rules.
- This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)
Attributes
rule_ensemble
:RuleEnsemble
- The rule ensemble
feature_names
:list
ofstrings
, optional (default=None
)- The names of the features (columns)
Expand source code
class FPLassoClassifier(FPLasso): def _init_prediction_task(self): self.prediction_task = 'classification'
Ancestors
Inherited members
class FPLassoRegressor (minsupport=0.1, maxcardinality=2, verbose=False, tree_size=4, sample_fract='default', max_rules=2000, memory_par=0.01, tree_generator=None, lin_trim_quantile=0.025, lin_standardise=True, exp_rand_tree_size=True, include_linear=True, alphas=None, cv=3, random_state=None)
-
Rulefit class. Rather than using this class directly, should use RuleFitRegressor or RuleFitClassifier
Parameters
tree_size
:Number
ofterminal
nodes
in
generated
trees.
If
exp_rand_tree_size
=True
,- this will be the mean number of terminal nodes.
sample_fract
:fraction
ofrandomly
chosen
training
observations
used
to
produce
each
tree.
- FP 2004 (Sec. 2)
max_rules
:total
number
ofterms
included
in
the
final
model
(both
linear
and
rules
)- approximate total number of rules generated for fitting also is based on this Note that actual number of rules will usually be lower than this due to duplicates.
memory_par
:scale
multiplier
(shrinkage
factor
)applied
to
each
new
tree
when
- sequentially induced. FP 2004 (Sec. 2)
lin_standardise
:If
True
,the
linear
terms
will
be
standardised
as
per
Friedman
Sec
3.2
- by multiplying the winsorised variable by 0.4/stdev.
lin_trim_quantile
:If
lin_standardise
is
True
,this
quantile
will
be
used
to
trim
linear
- terms before standardisation.
exp_rand_tree_size
:If
True
,each
boosted
tree
will
have
a
different
maximum
number
of- terminal nodes based on an exponential distribution about tree_size. (Friedman Sec 3.3)
include_linear
:Include
linear
terms
as
opposed
to
only
rules
- random_state: Integer to initialise random objects and provide repeatability.
tree_generator
:Optional
:this
object
will
be
used
as
provided
to
generate
the
rules.
- This will override almost all the other properties above. Must be GradientBoostingRegressor or GradientBoostingClassifier, optional (default=None)
Attributes
rule_ensemble
:RuleEnsemble
- The rule ensemble
feature_names
:list
ofstrings
, optional (default=None
)- The names of the features (columns)
Expand source code
class FPLassoRegressor(FPLasso): def _init_prediction_task(self): self.prediction_task = 'regression'
Ancestors
Inherited members