Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

metrics.html 13 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
  1. <!doctype html>
  2. <html lang="en">
  3. <head>
  4. <meta charset="utf-8">
  5. <meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
  6. <meta name="generator" content="pdoc 0.10.0" />
  7. <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
  8. <link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
  9. <link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
  10. <style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
  11. <style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
  12. <style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
  13. <script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
  14. <script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
  15. </head>
  16. <body>
  17. <main>
  18. <article id="content">
  19. <section id="section-intro">
  20. <details class="source">
  21. <summary>
  22. <span>Expand source code</span>
  23. </summary>
  24. <pre><code class="python">from __future__ import division
  25. __author__ = &#39;Victor Ruiz, vmr11@pitt.edu&#39;
  26. from math import log
  27. import pandas as pd
  28. def entropy(data_classes, base=2):
  29. &#39;&#39;&#39;
  30. Computes the entropy of a set of labels (class instantiations)
  31. :param base: logarithm base for computation
  32. :param data_classes: Series with labels of examples in a dataset
  33. :return: value of entropy
  34. &#39;&#39;&#39;
  35. if not isinstance(data_classes, pd.core.series.Series):
  36. raise AttributeError(&#39;input array should be a pandas series&#39;)
  37. classes = data_classes.unique()
  38. N = len(data_classes)
  39. ent = 0 # initialize entropy
  40. # iterate over classes
  41. for c in classes:
  42. partition = data_classes[data_classes == c] # data with class = c
  43. proportion = len(partition) / N
  44. # update entropy
  45. ent -= proportion * log(proportion, base)
  46. return ent
  47. def cut_point_information_gain(dataset, cut_point, feature_label, class_label):
  48. &#39;&#39;&#39;
  49. Return de information gain obtained by splitting a numeric attribute in two according to cut_point
  50. :param dataset: pandas dataframe with a column for attribute values and a column for class
  51. :param cut_point: threshold at which to partition the numeric attribute
  52. :param feature_label: column label of the numeric attribute values in data
  53. :param class_label: column label of the array of instance classes
  54. :return: information gain of partition obtained by threshold cut_point
  55. &#39;&#39;&#39;
  56. if not isinstance(dataset, pd.core.frame.DataFrame):
  57. raise AttributeError(&#39;input dataset should be a pandas data frame&#39;)
  58. entropy_full = entropy(dataset[class_label]) # compute entropy of full dataset (w/o split)
  59. # split data at cut_point
  60. data_left = dataset[dataset[feature_label] &lt;= cut_point]
  61. data_right = dataset[dataset[feature_label] &gt; cut_point]
  62. (N, N_left, N_right) = (len(dataset), len(data_left), len(data_right))
  63. gain = entropy_full - (N_left / N) * entropy(data_left[class_label]) - \
  64. (N_right / N) * entropy(data_right[class_label])
  65. return gain</code></pre>
  66. </details>
  67. </section>
  68. <section>
  69. </section>
  70. <section>
  71. </section>
  72. <section>
  73. <h2 class="section-title" id="header-functions">Functions</h2>
  74. <dl>
  75. <dt id="imodels.util.metrics.cut_point_information_gain"><code class="name flex">
  76. <span>def <span class="ident">cut_point_information_gain</span></span>(<span>dataset, cut_point, feature_label, class_label)</span>
  77. </code></dt>
  78. <dd>
  79. <div class="desc"><p>Return de information gain obtained by splitting a numeric attribute in two according to cut_point
  80. :param dataset: pandas dataframe with a column for attribute values and a column for class
  81. :param cut_point: threshold at which to partition the numeric attribute
  82. :param feature_label: column label of the numeric attribute values in data
  83. :param class_label: column label of the array of instance classes
  84. :return: information gain of partition obtained by threshold cut_point</p></div>
  85. <details class="source">
  86. <summary>
  87. <span>Expand source code</span>
  88. </summary>
  89. <pre><code class="python">def cut_point_information_gain(dataset, cut_point, feature_label, class_label):
  90. &#39;&#39;&#39;
  91. Return de information gain obtained by splitting a numeric attribute in two according to cut_point
  92. :param dataset: pandas dataframe with a column for attribute values and a column for class
  93. :param cut_point: threshold at which to partition the numeric attribute
  94. :param feature_label: column label of the numeric attribute values in data
  95. :param class_label: column label of the array of instance classes
  96. :return: information gain of partition obtained by threshold cut_point
  97. &#39;&#39;&#39;
  98. if not isinstance(dataset, pd.core.frame.DataFrame):
  99. raise AttributeError(&#39;input dataset should be a pandas data frame&#39;)
  100. entropy_full = entropy(dataset[class_label]) # compute entropy of full dataset (w/o split)
  101. # split data at cut_point
  102. data_left = dataset[dataset[feature_label] &lt;= cut_point]
  103. data_right = dataset[dataset[feature_label] &gt; cut_point]
  104. (N, N_left, N_right) = (len(dataset), len(data_left), len(data_right))
  105. gain = entropy_full - (N_left / N) * entropy(data_left[class_label]) - \
  106. (N_right / N) * entropy(data_right[class_label])
  107. return gain</code></pre>
  108. </details>
  109. </dd>
  110. <dt id="imodels.util.metrics.entropy"><code class="name flex">
  111. <span>def <span class="ident">entropy</span></span>(<span>data_classes, base=2)</span>
  112. </code></dt>
  113. <dd>
  114. <div class="desc"><p>Computes the entropy of a set of labels (class instantiations)
  115. :param base: logarithm base for computation
  116. :param data_classes: Series with labels of examples in a dataset
  117. :return: value of entropy</p></div>
  118. <details class="source">
  119. <summary>
  120. <span>Expand source code</span>
  121. </summary>
  122. <pre><code class="python">def entropy(data_classes, base=2):
  123. &#39;&#39;&#39;
  124. Computes the entropy of a set of labels (class instantiations)
  125. :param base: logarithm base for computation
  126. :param data_classes: Series with labels of examples in a dataset
  127. :return: value of entropy
  128. &#39;&#39;&#39;
  129. if not isinstance(data_classes, pd.core.series.Series):
  130. raise AttributeError(&#39;input array should be a pandas series&#39;)
  131. classes = data_classes.unique()
  132. N = len(data_classes)
  133. ent = 0 # initialize entropy
  134. # iterate over classes
  135. for c in classes:
  136. partition = data_classes[data_classes == c] # data with class = c
  137. proportion = len(partition) / N
  138. # update entropy
  139. ent -= proportion * log(proportion, base)
  140. return ent</code></pre>
  141. </details>
  142. </dd>
  143. </dl>
  144. </section>
  145. <section>
  146. </section>
  147. </article>
  148. <nav id="sidebar">
  149. <h1>Index 🔍</h1>
  150. <div class="toc">
  151. <ul></ul>
  152. </div>
  153. <ul id="index">
  154. <li><h3>Super-module</h3>
  155. <ul>
  156. <li><code><a title="imodels.util" href="index.html">imodels.util</a></code></li>
  157. </ul>
  158. </li>
  159. <li><h3><a href="#header-functions">Functions</a></h3>
  160. <ul class="">
  161. <li><code><a title="imodels.util.metrics.cut_point_information_gain" href="#imodels.util.metrics.cut_point_information_gain">cut_point_information_gain</a></code></li>
  162. <li><code><a title="imodels.util.metrics.entropy" href="#imodels.util.metrics.entropy">entropy</a></code></li>
  163. </ul>
  164. </li>
  165. </ul>
  166. <p><img align="center" width=100% src="https://csinva.io/imodels/img/anim.gif"> </img></p>
  167. <!-- add wave animation -->
  168. </nav>
  169. </main>
  170. <footer id="footer">
  171. </footer>
  172. </body>
  173. </html>
  174. <!-- add github corner -->
  175. <a href="https://github.com/csinva/imodels" class="github-corner" aria-label="View source on GitHub"><svg width="120" height="120" viewBox="0 0 250 250" style="fill:#70B7FD; color:#fff; position: absolute; top: 0; border: 0; right: 0;" aria-hidden="true"><path d="M0,0 L115,115 L130,115 L142,142 L250,250 L250,0 Z"></path><path d="m128.3,109.0 c113.8,99.7 119.0,89.6 119.0,89.6 c122.0,82.7 120.5,78.6 120.5,78.6 c119.2,72.0 123.4,76.3 123.4,76.3 c127.3,80.9 125.5,87.3 125.5,87.3 c122.9,97.6 130.6,101.9 134.4,103.2" fill="currentcolor" style="transform-origin: 130px 106px;" class="octo-arm"></path><path d="M115.0,115.0 C114.9,115.1 118.7,116.5 119.8,115.4 L133.7,101.6 C136.9,99.2 139.9,98.4 142.2,98.6 C133.8,88.0 127.5,74.4 143.8,58.0 C148.5,53.4 154.0,51.2 159.7,51.0 C160.3,49.4 163.2,43.6 171.4,40.1 C171.4,40.1 176.1,42.5 178.8,56.2 C183.1,58.6 187.2,61.8 190.9,65.4 C194.5,69.0 197.7,73.2 200.1,77.6 C213.8,80.2 216.3,84.9 216.3,84.9 C212.7,93.1 206.9,96.0 205.4,96.6 C205.1,102.4 203.0,107.8 198.3,112.5 C181.9,128.9 168.3,122.5 157.7,114.1 C157.9,116.9 156.7,120.9 152.7,124.9 L141.0,136.5 C139.8,137.7 141.6,141.9 141.8,141.8 Z" fill="currentColor" class="octo-body"></path></svg></a><style>.github-corner:hover .octo-arm{animation:octocat-wave 560ms ease-in-out}@keyframes octocat-wave{0%,100%{transform:rotate(0)}20%,60%{transform:rotate(-25deg)}40%,80%{transform:rotate(10deg)}}@media (max-width:500px){.github-corner:hover .octo-arm{animation:none}.github-corner .octo-arm{animation:octocat-wave 560ms ease-in-out}}</style>
  176. <!-- add wave animation stylesheet -->
  177. <link rel="stylesheet" href="github.css">
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...