Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

methods_parse.py 4.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. # In[8]:
  4. import numpy as np
  5. import pandas as pd
  6. import ast
  7. import traceback
  8. def UsedMethods2(code):
  9. try:
  10. p = ast.parse(code.strip('`'))
  11. names = sorted({node.attr for node in ast.walk(p) if isinstance(node, ast.Attribute)}) #but here we also have node.id for example
  12. result = list()
  13. for instance in names:
  14. pos = code.find(instance)
  15. print(pos, instance)
  16. if (pos > 0 and code[pos-1] == '.') and (pos < len(code) - len(instance) and (code[pos + len(instance)] == '.' or code[pos + len(instance) ] == '(')) :
  17. result.append(instance)
  18. return result
  19. except:
  20. return list()
  21. # attributes
  22. def UsedMethods(code):
  23. try:
  24. p = ast.parse(code.strip('`'))
  25. names = sorted({node.attr for node in ast.walk(p) if isinstance(node, ast.Attribute)})
  26. return names
  27. except:
  28. names = list()
  29. dot_pos = code.find('.')
  30. while dot_pos != -1:
  31. dot_pos += 1
  32. name = ""
  33. while (dot_pos < len(code)) and (code[dot_pos].isalpha() or code[dot_pos].isdigit() or code[dot_pos] in {'(', '.'}):
  34. if code[dot_pos] in {'(', '.'}:
  35. names.append(name)
  36. break
  37. name += code[dot_pos]
  38. dot_pos += 1
  39. dot_pos = code.find('.', dot_pos)
  40. return names
  41. #load dataframe
  42. # filename = "11.csv" #enter the name of a .csv file
  43. # df = pd.read_csv(filename)
  44. # #chop off the head
  45. # df.columns = df.iloc[0,:]
  46. # df = df.drop(index=0)
  47. #fill methods for each block
  48. # df['python_methods'] = df['code_block'].apply(UsedMethods)
  49. #fill methods for nearrby blocks
  50. def shift_methods(df, shift_range):
  51. """
  52. # df['python_methods_p1'] = np.NaN
  53. # df['python_methods_p2'] = np.NaN
  54. # df['python_methods_p3'] = np.NaN
  55. # df['python_methods_m1'] = np.NaN
  56. # df['python_methods_m2'] = np.NaN
  57. # df['python_methods_m3'] = np.NaN
  58. for i in range(1, shift_range + 1):
  59. df['python_methods_m{}'.format(i)] = np.NaN
  60. df['python_methods_p{}'.format(i)] = np.NaN
  61. # df['graph_vertex_m{}'.format(i)] = np.NaN
  62. # df['graph_vertex_p{}'.format(i)] = np.NaN
  63. df['python_methods_m{}'.format(i)][i:] = df['python_methods'][:-i]
  64. df['python_methods_p{}'.format(i)][:-i] = df['python_methods'][i:]
  65. # df['graph_vertex_m{}'.format(i)][i:] = df['graph_vertex_m'][:-i]
  66. # df['graph_vertex_p{}'.format(i)][:-i] = df['graph_vertex_p'][i:]
  67. # df['python_methods_m1'][1:] = df['python_methods'][:-1]
  68. # df['python_methods_m2'][2:] = df['python_methods'][:-2]
  69. # df['python_methods_m3'][3:] = df['python_methods'][:-3]
  70. # df['python_methods_p1'][:-1] = df['python_methods'][1:]
  71. # df['python_methods_p2'][:-2] = df['python_methods'][2:]
  72. # df['python_methods_p3'][:-3] = df['python_methods'][3:]
  73. """
  74. for i in range(1, shift_range + 1):
  75. df['python_methods_m{}'.format(i)] = np.NaN
  76. df['graph_vertex_m{}'.format(i)] = np.NaN
  77. # for i in range(1, shift_range + 1):
  78. df['python_methods_p{}'.format(i)] = np.NaN
  79. df['graph_vertex_p{}'.format(i)] = np.NaN
  80. nb_list = df['kaggle_id'].unique()
  81. for name in nb_list:
  82. df_name = df[df['kaggle_id'] == name]
  83. for i in range(1, min(shift_range + 1, df_name.shape[0])):
  84. df_name['python_methods_m{}'.format(i)][i:] = df_name['python_methods'][:-i]
  85. df_name['python_methods_p{}'.format(i)][:-i] = df_name['python_methods'][i:]
  86. df_name['python_methods_m{}'.format(i)][i:] = df_name['python_methods'][:-i]
  87. df_name['python_methods_p{}'.format(i)][:-i] = df_name['python_methods'][i:]
  88. df_name['graph_vertex_m{}'.format(i)][i:] = df_name['graph_vertex'][:-i]
  89. df_name['graph_vertex_p{}'.format(i)][:-i] = df_name['graph_vertex'][i:]
  90. df_name['graph_vertex_m{}'.format(i)][i:] = df_name['graph_vertex'][:-i]
  91. df_name['graph_vertex_p{}'.format(i)][:-i] = df_name['graph_vertex'][i:]
  92. df[df['kaggle_id'] == name] = df_name
  93. return df
  94. # #export dataframe
  95. # df.to_csv(filename[:-4] + "python_methods.csv", index=False)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...