Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

extractor.py 2.2 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
  1. import subprocess
  2. class Extractor:
  3. def __init__(self, config, jar_path, max_path_length, max_path_width):
  4. self.config = config
  5. self.max_path_length = max_path_length
  6. self.max_path_width = max_path_width
  7. self.jar_path = jar_path
  8. def extract_paths(self, path):
  9. #command = ['java', '-jar', self.jar_path, 'code2vec', '--lang', 'py', '--project', 'pred_files', '--output', 'cd2vec' , '--maxH', str(self.max_path_length), '--maxW', str(self.max_path_width)]
  10. output_file = open('cd2vec/path_contexts_0.csv', 'r')
  11. #process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  12. #out, err = process.communicate()
  13. #output = out.decode().splitlines()
  14. #if len(output_file) == 0:
  15. # err = err.decode()
  16. # raise ValueError(err)
  17. hash_to_string_dict = {}
  18. result = []
  19. for i, line in enumerate(output_file):
  20. parts = line.rstrip().split(' ')
  21. method_name = parts[0]
  22. current_result_line_parts = [method_name]
  23. contexts = parts[1:]
  24. for context in contexts[:self.config.MAX_CONTEXTS]:
  25. context_parts = context.split(',')
  26. context_word1 = context_parts[0]
  27. context_path = context_parts[1]
  28. context_word2 = context_parts[2]
  29. hashed_path = str(self.java_string_hashcode(context_path))
  30. hash_to_string_dict[hashed_path] = context_path
  31. current_result_line_parts += ['%s,%s,%s' % (context_word1, hashed_path, context_word2)]
  32. space_padding = ' ' * (self.config.MAX_CONTEXTS - len(contexts))
  33. result_line = ' '.join(current_result_line_parts) + space_padding
  34. result.append(result_line)
  35. return result, hash_to_string_dict
  36. @staticmethod
  37. def java_string_hashcode(s):
  38. """
  39. Imitating Java's String#hashCode, because the model is trained on hashed paths but we wish to
  40. Present the path attention on un-hashed paths.
  41. """
  42. h = 0
  43. for c in s:
  44. h = (31 * h + ord(c)) & 0xFFFFFFFF
  45. return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...