Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

histcountsn.m 10 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  1. function [n,edges,binIdcs] = histcountsn(x,nBins,varargin)
  2. %HISTCOUNTSN N-variate histogram bin counts.
  3. % [N,EDGES] = HISTCOUNTSN(X) partitions the values
  4. % in columns of X into bins, and returns the count in each bin, as well
  5. % as the bin edges. HISTCOUNTSN determines the bin edges using an
  6. % automatic binning algorithm that returns uniform bins chosen to cover
  7. % the range of values in each column of X and reveal the shape of the
  8. % underlying distribution.
  9. %%
  10. % N an I1-by-I2-by-...-by-IN matrix where I1 through IN are the number of
  11. % bins along the X1 through XN dimensions respectively. ...
  12. %
  13. % N is an I-by-J matrixand I where I and J are the number of bins along the
  14. % X and Y dimensions respectively. N(i,j) will count the value [X(k),Y(k)]
  15. % if XEDGES(i) <= X(k) < XEDGES(i+1) and YEDGES(j) <= Y(k) < YEDGES(j+1).
  16. % The last bins in the X and Y dimensions will also include the upper
  17. % edge. For example, [X(k),Y(k)] will fall into the i-th bin in the last
  18. % row if XEDGES(end-1) <= X(k) <= XEDGES(end) &&
  19. % YEDGES(i) <= Y(k) < YEDGES(i+1).
  20. %%
  21. % [N,X1EDGES,X2EDGES,...,XNEDGES] = HISTCOUNTSN(X,NBINS) where NBINS is a
  22. % scalar or N-element vector, specifies the number of bins to use. A
  23. % scalar specifies the same number of bins in each dimension, whereas the
  24. % N-element vector [nbinsx1 nbinsx2 ... nbinsxn] specifies a different
  25. % number of bins for the X1 through XN dimensions.
  26. %%
  27. nBinsIN = nBins;
  28. opts = parseinput(varargin);
  29. % Filter NaN values from input.
  30. x(any(isnan(x),2),:) = [];
  31. % nnn = numel(x(:,1));
  32. [~,nDims] = size(x);
  33. subs = [];
  34. binIdcs = cell(nDims,1);
  35. edges = cell(nDims,1);
  36. if numel(nBinsIN) == 1
  37. % One bin size for all dimensions.
  38. nBins = repmat(nBins,nDims,1);
  39. end
  40. sz = zeros(1,nDims);
  41. for iD = 1:nDims
  42. if numel(nBinsIN) == 0
  43. % Allow histcounts to autobin.
  44. [~,edges{iD},binIdcs{iD}] = histcounts(x(:,iD));
  45. nBins = [nBins, numel(edges{iD}) - 1];
  46. elseif numel(nBinsIN) == 1
  47. % One bin size for all dimensions.
  48. [~,edges{iD},binIdcs{iD}] = histcounts(x(:,iD),nBins(iD));
  49. elseif numel(nBinsIN) > 1
  50. % Read bin size for each dimension.
  51. [~,edges{iD},binIdcs{iD}] = histcounts(x(:,iD),nBins(iD));
  52. end
  53. subs_tmp = binIdcs{iD};
  54. % Filter out-of-range data (bin index = 0).
  55. subs(any(subs_tmp==0,2),:) = [];
  56. subs = [subs, subs_tmp];
  57. sz(iD) = repmat(nBins(iD),1,1);
  58. end
  59. if nDims == 1
  60. sz = [sz 1];
  61. end
  62. n = accumarray(subs,ones(size(subs,1),1),sz);
  63. %% Normalization options
  64. switch opts.Normalization
  65. case 'countdensity'
  66. edgeL = zeros(nDims,1);
  67. for iD = 1:nDims
  68. edgeL(iD) = mean(double(diff(edges{iD})));
  69. end
  70. binVolumeN = prod(edgeL);
  71. n = n / binVolumeN;
  72. case 'cumcount'
  73. for iD = 1:nDims
  74. n = cumsum(n,iD);
  75. end
  76. case 'probability'
  77. n = n/numel(subs(:,1));
  78. case 'pdf'
  79. edgeL = zeros(nDims,1);
  80. for iD = 1:nDims
  81. edgeL(iD) = mean(double(diff(edges{iD})));
  82. end
  83. binVolumeN = prod(edgeL);
  84. n = n/numel(subs(:,1)) / binVolumeN;
  85. case 'cdf'
  86. n = n/numel(subs(:,1));
  87. for iD = 1:nDims
  88. n = cumsum( n, iD );
  89. end
  90. end
  91. end
  92. %% LOCAL FUNCTIONS
  93. %%% TAKEN VERBATIM FROM HISTCOUNTS2
  94. function opts = parseinput(input) % Input is varargin (inputs 2+)
  95. % opts = struct('NumBins',[],'BinEdges',{},'BinLimits',{},'BinWidth', ...
  96. % 'Normalization','count','BinMethod','auto');
  97. opts = struct('NumBins',[],'XBinEdges',[],'YBinEdges',[],'XBinLimits',[],...
  98. 'YBinLimits',[],'BinWidth',[],'Normalization','count','BinMethod','auto');
  99. funcname = mfilename;
  100. % Parse third and fourth input in the function call
  101. inputlen = length(input);
  102. if inputlen > 0
  103. in = input{1};
  104. inputoffset = 0;
  105. if isnumeric(in) || islogical(in)
  106. if inputlen == 1 || ~(isnumeric(input{2}) || islogical(input{2}))
  107. % Numbins
  108. if isscalar(in)
  109. in = [in in];
  110. end
  111. validateattributes(in,{'numeric','logical'},{'integer', 'positive', ...
  112. 'numel', 2, 'vector'}, funcname, 'm', inputoffset+3)
  113. opts.NumBins = in;
  114. input(1) = [];
  115. inputoffset = inputoffset + 1;
  116. else
  117. % XBinEdges and YBinEdges
  118. in2 = input{2};
  119. validateattributes(in,{'numeric','logical'},{'vector', ...
  120. 'real', 'nondecreasing'}, funcname, 'xedges', inputoffset+3)
  121. if length(in) < 2
  122. error(message('MATLAB:histcounts2:EmptyOrScalarXBinEdges'));
  123. end
  124. validateattributes(in2,{'numeric','logical'},{'vector', ...
  125. 'real', 'nondecreasing'}, funcname, 'yedges', inputoffset+4)
  126. if length(in2) < 2
  127. error(message('MATLAB:histcounts2:EmptyOrScalarYBinEdges'));
  128. end
  129. opts.XBinEdges = in;
  130. opts.YBinEdges = in2;
  131. input(1:2) = [];
  132. inputoffset = inputoffset + 2;
  133. end
  134. opts.BinMethod = [];
  135. end
  136. % All the rest are name-value pairs
  137. inputlen = length(input);
  138. if rem(inputlen,2) ~= 0
  139. error(message('MATLAB:histcounts2:ArgNameValueMismatch'))
  140. end
  141. for i = 1:2:inputlen
  142. name = validatestring(input{i}, {'NumBins', 'XBinEdges', ...
  143. 'YBinEdges','BinWidth', 'BinMethod', 'XBinLimits', ...
  144. 'YBinLimits','Normalization'}, i+2+inputoffset);
  145. value = input{i+1};
  146. switch name
  147. case 'NumBins'
  148. if isscalar(value)
  149. value = [value value]; %#ok
  150. end
  151. validateattributes(value,{'numeric','logical'},{'integer', ...
  152. 'positive', 'numel', 2, 'vector'}, funcname, 'NumBins', i+3+inputoffset)
  153. opts.NumBins = value;
  154. if ~isempty(opts.XBinEdges)
  155. error(message('MATLAB:histcounts2:InvalidMixedXBinInputs'))
  156. elseif ~isempty(opts.YBinEdges)
  157. error(message('MATLAB:histcounts2:InvalidMixedYBinInputs'))
  158. end
  159. opts.BinMethod = [];
  160. opts.BinWidth = [];
  161. case 'XBinEdges'
  162. validateattributes(value,{'numeric','logical'},{'vector', ...
  163. 'real', 'nondecreasing'}, funcname, 'XBinEdges', i+3+inputoffset);
  164. if length(value) < 2
  165. error(message('MATLAB:histcounts2:EmptyOrScalarXBinEdges'));
  166. end
  167. opts.XBinEdges = value;
  168. % Only set NumBins field to empty if both XBinEdges and
  169. % YBinEdges are set, to enable BinEdges override of one
  170. % dimension
  171. if ~isempty(opts.YBinEdges)
  172. opts.NumBins = [];
  173. opts.BinMethod = [];
  174. opts.BinWidth = [];
  175. end
  176. opts.XBinLimits = [];
  177. case 'YBinEdges'
  178. validateattributes(value,{'numeric','logical'},{'vector', ...
  179. 'real', 'nondecreasing'}, funcname, 'YBinEdges', i+3+inputoffset);
  180. if length(value) < 2
  181. error(message('MATLAB:histcounts2:EmptyOrScalarYBinEdges'));
  182. end
  183. opts.YBinEdges = value;
  184. % Only set NumBins field to empty if both XBinEdges and
  185. % YBinEdges are set, to enable BinEdges override of one
  186. % dimension
  187. if ~isempty(opts.XBinEdges)
  188. opts.BinMethod = [];
  189. opts.NumBins = [];
  190. %opts.BinLimits = [];
  191. opts.BinWidth = [];
  192. end
  193. opts.YBinLimits = [];
  194. case 'BinWidth'
  195. if isscalar(value)
  196. value = [value value]; %#ok
  197. end
  198. validateattributes(value, {'numeric','logical'}, {'real', 'positive',...
  199. 'finite','numel',2,'vector'}, funcname, ...
  200. 'BinWidth', i+3+inputoffset);
  201. opts.BinWidth = value;
  202. if ~isempty(opts.XBinEdges)
  203. error(message('MATLAB:histcounts2:InvalidMixedXBinInputs'))
  204. elseif ~isempty(opts.YBinEdges)
  205. error(message('MATLAB:histcounts2:InvalidMixedYBinInputs'))
  206. end
  207. opts.BinMethod = [];
  208. opts.NumBins = [];
  209. case 'BinMethod'
  210. opts.BinMethod = validatestring(value, {'auto','scott',...
  211. 'fd','integers'}, funcname, 'BinMethod', i+3+inputoffset);
  212. if ~isempty(opts.XBinEdges)
  213. error(message('MATLAB:histcounts2:InvalidMixedXBinInputs'))
  214. elseif ~isempty(opts.YBinEdges)
  215. error(message('MATLAB:histcounts2:InvalidMixedYBinInputs'))
  216. end
  217. opts.BinWidth = [];
  218. opts.NumBins = [];
  219. case 'XBinLimits'
  220. validateattributes(value, {'numeric','logical'}, {'numel', 2, ...
  221. 'vector', 'real', 'finite','nondecreasing'}, funcname, ...
  222. 'XBinLimits', i+3+inputoffset)
  223. opts.XBinLimits = value;
  224. if ~isempty(opts.XBinEdges)
  225. error(message('MATLAB:histcounts2:InvalidMixedXBinInputs'))
  226. end
  227. case 'YBinLimits'
  228. validateattributes(value, {'numeric','logical'}, {'numel', 2, ...
  229. 'vector', 'real', 'finite','nondecreasing'}, funcname, ...
  230. 'YBinLimits', i+3+inputoffset)
  231. opts.YBinLimits = value;
  232. if ~isempty(opts.YBinEdges)
  233. error(message('MATLAB:histcounts2:InvalidMixedYBinInputs'))
  234. end
  235. otherwise % 'Normalization'
  236. opts.Normalization = validatestring(value, {'count', 'countdensity', 'cumcount',...
  237. 'probability', 'pdf', 'cdf'}, funcname, 'Normalization', i+3+inputoffset);
  238. end
  239. end
  240. end
  241. end
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...