Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

score.py 1.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2017-present, Facebook, Inc.
  3. # All rights reserved.
  4. #
  5. # This source code is licensed under the license found in the LICENSE file in
  6. # the root directory of this source tree. An additional grant of patent rights
  7. # can be found in the PATENTS file in the same directory.
  8. #
  9. import argparse
  10. import os
  11. import sys
  12. from fairseq import bleu, tokenizer
  13. from fairseq.data import dictionary
  14. def main():
  15. parser = argparse.ArgumentParser(description='Command-line script for BLEU scoring.')
  16. parser.add_argument('-s', '--sys', default='-', help='system output')
  17. parser.add_argument('-r', '--ref', required=True, help='references')
  18. parser.add_argument('-o', '--order', default=4, metavar='N',
  19. type=int, help='consider ngrams up to this order')
  20. parser.add_argument('--ignore-case', action='store_true',
  21. help='case-insensitive scoring')
  22. args = parser.parse_args()
  23. print(args)
  24. assert args.sys == '-' or os.path.exists(args.sys), \
  25. "System output file {} does not exist".format(args.sys)
  26. assert os.path.exists(args.ref), \
  27. "Reference file {} does not exist".format(args.ref)
  28. dict = dictionary.Dictionary()
  29. def readlines(fd):
  30. for line in fd.readlines():
  31. if args.ignore_case:
  32. yield line.lower()
  33. yield line
  34. def score(fdsys):
  35. with open(args.ref) as fdref:
  36. scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
  37. for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
  38. sys_tok = tokenizer.Tokenizer.tokenize(sys_tok, dict)
  39. ref_tok = tokenizer.Tokenizer.tokenize(ref_tok, dict)
  40. scorer.add(ref_tok, sys_tok)
  41. print(scorer.result_string(args.order))
  42. if args.sys == '-':
  43. score(sys.stdin)
  44. else:
  45. with open(args.sys, 'r') as f:
  46. score(f)
  47. if __name__ == '__main__':
  48. main()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...