Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

test_data_and_model.py 1.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  1. import pytest
  2. from sklearn.metrics import classification_report
  3. from data.datamanager import data_loader
  4. import joblib
  5. import numpy as np
  6. @pytest.fixture
  7. def adult_test_dataset():
  8. path = './data/adult_test.csv'
  9. x, y = data_loader(path)
  10. return x, y, path
  11. def test_dataloader(adult_test_dataset):
  12. # Test whether there are columns containing unique values within the cleaned dataset or whether there are
  13. x, y, _ = adult_test_dataset
  14. # perform unique count for each column of the dataframe
  15. n_unique = x.nunique(axis=0).values
  16. # perform unique count for each CATEGORICAL column of the dataframe
  17. n_unique_categorical = np.array([x[i].nunique() for i in x.columns[x.dtypes == 'object']])
  18. # Perform tests on unique counts
  19. assert n_unique.min() > 1
  20. assert all(n_unique_categorical/x.shape[0] < 0.9)
  21. def test_model_metrics(adult_test_dataset):
  22. x, y, data_path = adult_test_dataset
  23. clf = joblib.load('./model.pkl')
  24. predictions = clf.predict(x)
  25. metrics = classification_report(y, predictions, output_dict=True)
  26. # just adding a comment
  27. assert len(np.unique(predictions)) > 1
  28. assert metrics['>50K']['precision'] > 0.7 # fill here
  29. assert metrics['>50K']['recall'] > 0.1 # fill here
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...