1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
- eimp:
- embeddings:
- vector_size: 50,100,200, 300 # list of values devided by coma, or range, e.g. 50:500:10
- dataset_size: 2000, 5000
- train_size: 0.9 # proportion of train data in the dataset
- all_models_params:
- metric: ['euclidean', 'manhattan', 'chebyshev', 'cosine', 'angular']
- search_params:
- k: [1, 10] #k-neighbors
- model:
- faiss:
- venc: [8, 16, 32, 64] #vectors encoding
- indexes: ['Flat', 'HNSW32,Flat', 'IVF65536_HNSW32,Flat', 'HNSW32,SQ8', 'IVF65536_HNSW32,SQ8']
- nprobe: [1, 5, 10, 20, 40, 80, 100] #the number of cells (out of nlist)
- nlist: [1, 5, 10, 20, 40, 80, 100] #the number of cells
- M: [1, 10, 100, 1000] #is the number of neighbors used in the graph
- annoy:
- n_trees: [10, 50, 100, 200, 500, 1000]
- postgre:
- indexes: ['gist', 'spgist']
- KDTree:
- leaf_size: [10, 50, 100, 200, 500, 1000]
- estimation: # different view on graphs to build
- aimed_param_values: # if the following parameters variations are not shown is the graph, only part of dataframe will be considered with their restrictions
- dataset_size: 5000
- metric: 'euclidean'
- vector_size: 300
- k: 10
- x: ['k', "dataset_size", 'vector_size'] # parameters to be used in x axis
- y: # parameters to be used in y axis
- train: ["training_time", "saving_time", "model_size"]
- test: ["search_time", "loading_time"]
- lines: ['k', "metric", "model"] # parameters to be used as different lines on the graph (hue/color/lines)
- facet: ['k', "dataset_size", 'vector_size', "metric"] # parameters to be used in facet construction
- topn: 3 # show best topn models in the report file
- relative_graphs: false # show relative (e.g. model search time / fullscan search time) data on graphs
- log10_graphs: true # show results in semi-log scale
- order_size_train:
- max: 10
- min: 2
- random_model:
- random_seed: 2019
- recommendations:
- n_items: 10
- basket_tfidf_perceptron_model:
- random_seed: 10027
- params_grid: {"batch_size": 512, "epoch_count": 20, "lr": 0.001, "momentum": 0.8}
|