Tigramite基本使用教程¶
线性测试¶
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
## use `%matplotlib notebook` for interactive figures
# plt.style.use('ggplot')
import sklearn
import tigramite
from tigramite import data_processing as pp
from tigramite import plotting as tp
from tigramite.pcmci import PCMCI
from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
数据生成函数:
\begin{aligned} X^0_t &= 0.7 X^0_{t-1} - 0.8 X^1_{t-1} + \eta^0_t\ X^1_t &= 0.8 X^1_{t-1} + 0.8 X^3_{t-1} + \eta^1_t\ X^2_t &= 0.5 X^2_{t-1} + 0.5 X^1_{t-2} + 0.6 X^3_{t-3} + \eta^2_t\ X^3_t &= 0.7 X^3_{t-1} + \eta^3_t\ \end{aligned}
其中\(\eta\)满足均值为0方差为1的高斯分布,是添加的随机噪音。
目标:
重构出每一个变量的drivers
。
np.random.seed(42)
links_coeffs = {0: [((0, -1), 0.7), ((1, -1), -0.8)],
1: [((1, -1), 0.8), ((3, -1), 0.8)],
2: [((2, -1), 0.5), ((1, -2), 0.5), ((3, -3), 0.6)],
3: [((3, -1), 0.4)],
}
T = 1000
data, true_parents_neighbors = pp.var_process(links_coeffs, T=T)
T, N = data.shape
var_names = [r'$X^0$', r'$X^1$', r'$X^2$', r'$X^3$']
dataframe = pp.DataFrame(data,
datatime = np.arange(len(data)),
var_names=var_names)
data.shape
(1000, 4)
tp.plot_timeseries(dataframe)
plt.show()

parcorr = ParCorr(significance='analytic')
pcmci = PCMCI(
dataframe=dataframe,
cond_ind_test=parcorr,
verbosity=1)
correlations = pcmci.get_lagged_dependencies(tau_max=20, val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
setup_args={'figsize': (6,6),'var_names':var_names,
'x_base':10, 'y_base':.5})
##
## Estimating lagged dependencies
##
Parameters:
independence test = par_corr
tau_min = 0
tau_max = 20

pcmci.verbosity = 0
results = pcmci.run_pcmci(tau_max=8, pc_alpha=None)
print("p-values")
print (results['p_matrix'].round(3))
print("MCI partial correlations")
print (results['val_matrix'].round(2))
p-values
[[[1. 0. 0.262 0.894 0.886 0.034 0.417 0.255 0.877]
[0.067 0.296 0.095 0.765 0.502 0.242 0.234 0.899 0.951]
[0.832 0.657 0.453 0.011 0.359 0.48 0.901 0.243 0.375]
[0.827 0.656 0.495 0.592 0.643 0.976 0.775 0.931 0.759]]
[[0.067 0. 0.15 0.51 0.041 0.005 0.559 0.533 0.392]
[1. 0. 0.911 0.07 0.576 0.556 0.175 0.732 0.741]
[0.651 0.764 0. 0.231 0.495 0.43 0.423 0.944 0.238]
[0.153 0.171 0.088 0.989 0.965 0.859 0.915 0.43 0.671]]
[[0.832 0.1 0.182 0.443 0.37 0.265 0.998 0.89 0.65 ]
[0.651 0.072 0.594 0.387 0.883 0.765 0.696 0.44 0.69 ]
[1. 0. 0.074 0.942 0.956 0.626 0.818 0.526 0.942]
[0.788 0.575 0.332 0.658 0.603 0.65 0.816 0.055 0.851]]
[[0.827 0.03 0.021 0.597 0.635 0.2 0.392 0.081 0.85 ]
[0.153 0. 0.558 0.534 0.72 0.284 0.798 0.016 0.162]
[0.788 0.114 0.781 0. 0.975 0.098 0.265 0.161 0.642]
[1. 0. 0.937 0.85 0.503 0.657 0.394 0.795 0.567]]]
MCI partial correlations
[[[ 0. 0.57 0.04 0. -0. 0.07 0.03 -0.04 -0. ]
[-0.06 -0.03 0.05 0.01 0.02 -0.04 0.04 0. -0. ]
[ 0.01 -0.01 0.02 0.08 -0.03 -0.02 -0. -0.04 0.03]
[ 0.01 0.01 0.02 0.02 0.01 -0. -0.01 -0. -0.01]]
[[-0.06 -0.65 0.05 0.02 0.07 -0.09 0.02 0.02 0.03]
[ 0. 0.62 0. -0.06 -0.02 -0.02 0.04 -0.01 0.01]
[ 0.01 -0.01 0.45 0.04 0.02 0.03 -0.03 -0. -0.04]
[-0.05 -0.04 -0.05 0. -0. -0.01 -0. -0.03 0.01]]
[[ 0.01 -0.05 -0.04 0.02 0.03 0.04 -0. 0. -0.01]
[ 0.01 0.06 0.02 -0.03 -0. -0.01 -0.01 -0.02 -0.01]
[ 0. 0.43 -0.06 0. -0. 0.02 0.01 0.02 0. ]
[-0.01 -0.02 0.03 -0.01 -0.02 -0.01 0.01 0.06 0.01]]
[[ 0.01 -0.07 0.07 -0.02 0.02 0.04 -0.03 0.06 -0.01]
[-0.05 0.66 0.02 -0.02 -0.01 0.03 0.01 0.08 -0.04]
[-0.01 0.05 -0.01 0.45 -0. -0.05 0.04 0.04 -0.01]
[ 0. 0.37 -0. -0.01 -0.02 -0.01 0.03 -0.01 0.02]]]
q_matrix = pcmci.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
pcmci.print_significant_links(
p_matrix = results['p_matrix'],
q_matrix = q_matrix,
val_matrix = results['val_matrix'],
alpha_level = 0.01)
## Significant links at alpha = 0.01:
Variable $X^0$ has 2 link(s):
($X^1$ -1): pval = 0.00000 | qval = 0.00000 | val = -0.653
($X^0$ -1): pval = 0.00000 | qval = 0.00000 | val = 0.566
Variable $X^1$ has 2 link(s):
($X^3$ -1): pval = 0.00000 | qval = 0.00000 | val = 0.663
($X^1$ -1): pval = 0.00000 | qval = 0.00000 | val = 0.622
Variable $X^2$ has 3 link(s):
($X^3$ -3): pval = 0.00000 | qval = 0.00000 | val = 0.451
($X^1$ -2): pval = 0.00000 | qval = 0.00000 | val = 0.446
($X^2$ -1): pval = 0.00000 | qval = 0.00000 | val = 0.425
Variable $X^3$ has 1 link(s):
($X^3$ -1): pval = 0.00000 | qval = 0.00000 | val = 0.372
link_matrix = pcmci.return_significant_links(pq_matrix=q_matrix,
val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix']
link_matrix.shape
(4, 4, 9)
tp.plot_graph(
figsize=(5,5),
val_matrix=results['val_matrix'],
link_matrix=link_matrix,
var_names=var_names,
link_colorbar_label='cross-MCI',
node_colorbar_label='auto-MCI',
); plt.show()

# Plot time series graph
tp.plot_time_series_graph(
val_matrix=results['val_matrix'],
link_matrix=link_matrix,
var_names=var_names,
link_colorbar_label='MCI',
); plt.show()

非线性测试¶
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
## use `%matplotlib notebook` for interactive figures
# plt.style.use('ggplot')
import sklearn
import tigramite
from tigramite import data_processing as pp
from tigramite import plotting as tp
from tigramite.pcmci import PCMCI
from tigramite.independence_tests import ParCorr, GPDC, CMIknn, CMIsymb
\begin{align*} X^0_t &= 0.2 (X^1_{t-1})^2 + \eta^0_t\ X^1_t &= \eta^1_t \ X^2_t &= 0.3 (X^1_{t-2})^2 + \eta^2_t \end{align*}
var_names = [r'$X^0$', r'$X^1$', r'$X^2$']
np.random.seed(1)
data = np.random.randn(500, 3)
for t in range(1, 500):
data[t, 0] += 0.4*data[t-1, 1]**2
data[t, 2] += 0.3*data[t-2, 1]**2
dataframe = pp.DataFrame(data, var_names=var_names)
tp.plot_timeseries(dataframe)
plt.show()

corr¶
parcorr = ParCorr(significance='analytic')
pcmci_parcorr = PCMCI(
dataframe=dataframe,
cond_ind_test=parcorr,
verbosity=0)
correlations = pcmci_parcorr.get_lagged_dependencies(tau_max=20, val_only=True)['val_matrix']
lag_func_matrix = tp.plot_lagfuncs(val_matrix=correlations,
setup_args={'var_names':var_names, 'figsize': (5,5),
'x_base':5, 'y_base':.5})

results = pcmci_parcorr.run_pcmci(tau_max=2, pc_alpha=0.2)
pcmci_parcorr.print_significant_links(
p_matrix = results['p_matrix'],
val_matrix = results['val_matrix'],
alpha_level = 0.01)
## Significant links at alpha = 0.01:
Variable $X^0$ has 0 link(s):
Variable $X^1$ has 0 link(s):
Variable $X^2$ has 1 link(s):
($X^0$ -1): pval = 0.00000 | val = 0.234
q_matrix = pcmci_parcorr.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
link_matrix = pcmci_parcorr.return_significant_links(pq_matrix=q_matrix,
val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix']
tp.plot_graph(
val_matrix=results['val_matrix'],
link_matrix=link_matrix,
var_names=var_names,
link_colorbar_label='cross-MCI',
node_colorbar_label='auto-MCI',
); plt.show()

gpdc¶
gpdc = GPDC(significance='analytic', gp_params=None)
pcmci_gpdc = PCMCI(
dataframe=dataframe,
cond_ind_test=gpdc,
verbosity=0)
results = pcmci_gpdc.run_pcmci(tau_max=2, pc_alpha=0.1)
pcmci_gpdc.print_significant_links(
p_matrix = results['p_matrix'],
val_matrix = results['val_matrix'],
alpha_level = 0.01)
## Significant links at alpha = 0.01:
Variable $X^0$ has 1 link(s):
($X^1$ -1): pval = 0.00000 | val = 0.030
Variable $X^1$ has 0 link(s):
Variable $X^2$ has 1 link(s):
($X^1$ -2): pval = 0.00000 | val = 0.028
q_matrix = pcmci_parcorr.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
link_matrix = pcmci_parcorr.return_significant_links(pq_matrix=q_matrix,
val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix']
tp.plot_graph(
val_matrix=results['val_matrix'],
link_matrix=link_matrix,
var_names=var_names,
link_colorbar_label='cross-MCI',
node_colorbar_label='auto-MCI',
); plt.show()

cmiknn¶
cmi_knn = CMIknn(significance='shuffle_test', knn=0.1, shuffle_neighbors=5, transform='ranks')
pcmci_cmi_knn = PCMCI(
dataframe=dataframe,
cond_ind_test=cmi_knn,
verbosity=2)
results = pcmci_cmi_knn.run_pcmci(tau_max=2, pc_alpha=0.05)
pcmci_cmi_knn.print_significant_links(
p_matrix = results['p_matrix'],
val_matrix = results['val_matrix'],
alpha_level = 0.01)
##
## Step 1: PC1 algorithm with lagged conditions
##
Parameters:
independence test = cmi_knn
tau_min = 1
tau_max = 2
pc_alpha = [0.05]
max_conds_dim = None
max_combinations = 1
## Variable $X^0$
Iterating through pc_alpha = [0.05]:
# pc_alpha = 0.05 (1/1):
Testing condition sets of dimension 0:
Link ($X^0$ -1) --> $X^0$ (1/6):
Subset 0: () gives pval = 0.59100 / val = 0.005
Non-significance detected.
Link ($X^0$ -2) --> $X^0$ (2/6):
Subset 0: () gives pval = 0.27600 / val = 0.011
Non-significance detected.
Link ($X^1$ -1) --> $X^0$ (3/6):
q_matrix = pcmci_parcorr.get_corrected_pvalues(p_matrix=results['p_matrix'], fdr_method='fdr_bh')
link_matrix = pcmci_parcorr.return_significant_links(pq_matrix=q_matrix,
val_matrix=results['val_matrix'], alpha_level=0.01)['link_matrix']
tp.plot_graph(
val_matrix=results['val_matrix'],
link_matrix=link_matrix,
var_names=var_names,
link_colorbar_label='cross-MCI',
node_colorbar_label='auto-MCI',
); plt.show()
