Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

eval_lm.py 2.9 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
  1. #!/usr/bin/env python3 -u
  2. # Copyright (c) 2017-present, Facebook, Inc.
  3. # All rights reserved.
  4. #
  5. # This source code is licensed under the license found in the LICENSE file in
  6. # the root directory of this source tree. An additional grant of patent rights
  7. # can be found in the PATENTS file in the same directory.
  8. import numpy as np
  9. import torch
  10. from fairseq import data, options, progress_bar, tasks, utils
  11. from fairseq.meters import StopwatchMeter, TimeMeter
  12. from fairseq.sequence_scorer import SequenceScorer
  13. def main(args):
  14. assert args.path is not None, '--path required for evaluation!'
  15. if args.tokens_per_sample is None:
  16. args.tokens_per_sample = 1024
  17. print(args)
  18. use_cuda = torch.cuda.is_available() and not args.cpu
  19. # Load dataset splits
  20. task = tasks.setup_task(args)
  21. task.load_dataset(args.gen_subset)
  22. print('| {} {} {} examples'.format(args.data, args.gen_subset, len(task.dataset(args.gen_subset))))
  23. # Load ensemble
  24. print('| loading model(s) from {}'.format(args.path))
  25. models, _ = utils.load_ensemble_for_inference(args.path.split(':'), task)
  26. # Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer)
  27. for model in models:
  28. model.make_generation_fast_()
  29. itr = data.EpochBatchIterator(
  30. dataset=task.dataset(args.gen_subset),
  31. max_sentences=args.max_sentences or 4,
  32. max_positions=model.max_positions(),
  33. num_shards=args.num_shards,
  34. shard_id=args.shard_id,
  35. ).next_epoch_itr(shuffle=False)
  36. gen_timer = StopwatchMeter()
  37. scorer = SequenceScorer(models, task.target_dictionary)
  38. if use_cuda:
  39. scorer.cuda()
  40. score_sum = 0.
  41. count = 0
  42. with progress_bar.build_progress_bar(args, itr) as t:
  43. results = scorer.score_batched_itr(t, cuda=use_cuda, timer=gen_timer)
  44. wps_meter = TimeMeter()
  45. for _, src_tokens, __, hypos in results:
  46. for hypo in hypos:
  47. pos_scores = hypo['positional_scores']
  48. inf_scores = pos_scores.eq(float('inf')) | pos_scores.eq(float('-inf'))
  49. if inf_scores.any():
  50. print('| Skipping tokens with inf scores:',
  51. task.target_dictionary.string(hypo['tokens'][inf_scores.nonzero()]))
  52. pos_scores = pos_scores[(~inf_scores).nonzero()]
  53. score_sum += pos_scores.sum()
  54. count += pos_scores.numel()
  55. wps_meter.update(src_tokens.size(0))
  56. t.log({'wps': round(wps_meter.avg)})
  57. avg_nll_loss = -score_sum / count
  58. print('| Evaluated {} tokens in {:.1f}s ({:.2f} tokens/s)'.format(gen_timer.n, gen_timer.sum, 1. / gen_timer.avg))
  59. print('| Loss: {:.4f}, Perplexity: {:.2f}'.format(avg_nll_loss, np.exp(avg_nll_loss)))
  60. if __name__ == '__main__':
  61. parser = options.get_eval_lm_parser()
  62. args = options.parse_args_and_arch(parser)
  63. main(args)
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...