Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

__init__.py 3.3 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  1. # -*- coding: utf-8 -*-
  2. # Copyright (c) 2021. Jeffrey Nirschl. All rights reserved.
  3. #
  4. # Licensed under the MIT license. See the LICENSE file in the project
  5. # root directory for license information.
  6. #
  7. # Time-stamp: <>
  8. # ======================================================================
  9. import os
  10. from pathlib import Path
  11. import pandas as pd
  12. import yaml
  13. def load_params(filepath="params.yaml") -> dict:
  14. """Helper function to load params.yaml
  15. Args:
  16. filepath (str): filename or full filepath to yaml file with parameters
  17. Returns:
  18. dict: dictionary of parameters
  19. """
  20. assert (os.path.isfile(filepath)), FileNotFoundError
  21. # read params.yaml
  22. with open(filepath, "r") as file:
  23. params = yaml.safe_load(file)
  24. return params
  25. def convert_none_to_null(params):
  26. """Convert None values in params.yaml into null to ensure
  27. correct reading/writing as None type"""
  28. if isinstance(params, list):
  29. params[:] = [convert_none_to_null(elem) for elem in params]
  30. elif isinstance(params, dict):
  31. for k, v in params.items():
  32. params[k] = convert_none_to_null(v)
  33. return 'null' if params is None else params
  34. def save_params(params):
  35. """"""
  36. # convert None values to null
  37. # save params
  38. new_params = yaml.safe_dump(params)
  39. with open("params.yaml", 'w') as writer:
  40. writer.write(new_params)
  41. def load_data(data_path,
  42. sep=",",
  43. header=None,
  44. index_col=None) -> object:
  45. """Helper function to load train and test files
  46. as well as optional param loading
  47. Args:
  48. data_path (str or list of str): path to csv file
  49. sep (str):
  50. index_col (str):
  51. header (int):
  52. Returns:
  53. object:
  54. """
  55. for elem in data_path:
  56. if type(elem) is type(Path()):
  57. assert(elem.is_file()), FileNotFoundError
  58. else:
  59. assert(os.path.isfile(data_path)), FileNotFoundError
  60. # if single path as str, convert to list of str
  61. if type(data_path) is str:
  62. data_path = [data_path]
  63. # loop over filepath in list and read file
  64. output_df = [pd.read_csv(elem, sep=sep, header=header,
  65. index_col=index_col) for elem in data_path]
  66. # if single file as input, return single df not a list
  67. if len(output_df) == 1:
  68. output_df = output_df[0]
  69. return output_df
  70. def save_as_csv(df, filepath, output_dir,
  71. replace_text=".csv",
  72. suffix="_processed.csv",
  73. na_rep="nan",
  74. output_path=False):
  75. """Helper function to format the new filename and save output"""
  76. # if single path as str, convert to list of str
  77. if type(df) is not list:
  78. df = [df]
  79. if type(filepath) is str:
  80. filepath = [filepath]
  81. # list lengths must be equal
  82. assert (len(df) == len(filepath)), AssertionError
  83. for temp_df, temp_path in zip(df, filepath):
  84. # set output filenames
  85. save_fname = os.path.basename(temp_path.replace(replace_text,
  86. suffix))
  87. # save updated dataframes
  88. save_filepath = output_dir.joinpath(save_fname)
  89. temp_df.to_csv(save_filepath,
  90. na_rep=na_rep)
  91. if output_path:
  92. return save_filepath
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...