jinensetpal
/
transformer-belief-state-geometries
connected to https://github.com/jinensetpal/transformer-belief-state-geometries.git


  
1

	
2

	
3

	
4

	
5

	
6

	
7

	
8

	
9

	
10

	
11

	
12

	
13

	
14

	
15

	
16

	
17

	
18

	
19

	
20

	
21

	
22

	
23

	
24

	
25

	
26

	
27

	
28

	
29

	
30

	
31

	
32

	
33

	
34

	
35

	
36

	
            #!/usr/bin/env python3

from model import GPT, GPTConfig
import torch

device = 'cuda'
block_size = 256
n_layer = 6
n_head = 6
n_embd = 384
dropout = 0.2

residuals = []
def hook(module, i, o):
    residuals.append(i[0].view(-1, n_embd).detach())

if __name__ == '__main__':
    gptconf = GPTConfig(block_size=block_size,
            vocab_size=3,
            n_layer=n_layer,
            n_head=n_head,
            n_embd=n_embd,
            dropout=dropout,
            bias=False)
    model = GPT(gptconf)
    model = torch.compile(model).to(device)
    model.load_state_dict(torch.load('out-mess3/ckpt.pt', map_location=device)['model'])
    model.lm_head.register_forward_hook(hook)

    tokens = torch.load('mess3-tokens.pt')
    tokens = tokens.view(-1, tokens.size(2))[:, :-1].to(device)

    for token_subset in tokens.chunk(10):
        with torch.no_grad():
            model(token_subset)
    torch.save(torch.vstack(residuals), 'mess3-residuals.pt')