ML-Purdue
/
hackathonf23-Creativity_Underflow


  
1

	
            import os
import pickle

import tqdm
from PIL import Image

import torch
from torch.nn import functional as F
from transformers import AutoProcessor, GroupViTVisionModel, GroupViTTextModel, CLIPTokenizer

import settings
from utils.embeddingDataset import loadData

train = True
do_text = True
do_image = False
if train:
    out_dir = settings.TrainSettings.Output.out_dir
else:
    out_dir = settings.ValidationSettings.Output.out_dir


def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]


def processData():
    # models
    imgs, entities, captions, _ = loadData(train)

    processor = AutoProcessor.from_pretrained("nvidia/groupvit-gcc-yfcc")
    model_image = GroupViTVisionModel.from_pretrained("nvidia/groupvit-gcc-yfcc").to("cuda")
    tokenizer = CLIPTokenizer.from_pretrained("nvidia/groupvit-gcc-yfcc")
    model_text = GroupViTTextModel.from_pretrained("nvidia/groupvit-gcc-yfcc").to("cuda")

    models_preprocess = []
    saves = 0
    with torch.no_grad():
        if do_text:
            for img in tqdm.tqdm(imgs):
                key = int(os.path.split(img)[-1].split(".")[0])
                strings = []
                for index, caption in enumerate(captions[key][1:]):
                    for i in entities[key][index]:
                        strings.append(f"{entities[key][index][i]} {caption} {entities[key][index][i]}")

                text_inputs = tokenizer(strings, padding=True, return_tensors="pt")
                text_inputs["input_ids"] = text_inputs["input_ids"].to("cuda")
                text_inputs["attention_mask"] = text_inputs["attention_mask"].to("cuda")
                text_segs = model_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"],
                                       output_attentions=True)
                text_segs = text_segs.last_hidden_state
                # pad to make dim 20x20x256
                i = 0
                for index, caption in enumerate(captions[key][1:]):
                    segments = []
                    strings = []
                    for _ in entities[key][index]:
                        segment = text_segs[i]
                        i += 1
                        strings.append(f"{entities[key][index][_]} {caption} {entities[key][index][_]}")
                        segments.append(segment)
                    if len(segments):
                        text_segs_batch = torch.stack(segments)

                        text_segs_batch = F.pad(input=text_segs_batch, pad=(0, 0, 0, max(0, 20 - text_segs.size(1)),
                                                                            0, max(0, 20 - text_segs.size(0))),
                                                mode='constant', value=0).detach().cpu().numpy()
                        models_preprocess.append((key, strings, text_segs_batch))

                        if len(models_preprocess) % 10_000 == 0:
                            if os.path.exists(os.path.join(out_dir, f"data_text_saves{saves}.pk")):
                                os.remove(os.path.join(out_dir, f"data_text_saves{saves}.pk"))
                            with open(os.path.join(out_dir, f"data_text_saves{saves}.pk"), "wb") as f:
                                pickle.dump(models_preprocess, f)
                                print(f"Saved {len(models_preprocess)} to ./data_text_saves{saves}.pk")
                            saves += 1
                            models_preprocess = []
            print()
            print("Done processing text data")
            with open(os.path.join(out_dir, "data_text.pk"), "wb") as f:
                pickle.dump(models_preprocess, f)
                print("Saved data to ./data_text.pk")
        if do_image:
            # get batches process imgs to batches
            images_processed = {}
            for b in tqdm.tqdm(batch(imgs, 32), total=len(imgs) // 32 + (1 if len(imgs) % 32 != 0 else 0)):
                img_inputs = processor(images=[Image.open(img) for img in b], return_tensors="pt").to("cuda")
                outputs = model_image(**img_inputs, output_attentions=True)
                i = 0
                for img in b:
                    key = int(os.path.split(img)[-1].split(".")[0])
                    images_processed[key] = outputs.attentions[0][i].detach().cpu().numpy()
                    i += 1
            print()
            print("Done processing image data")
            with open(os.path.join(out_dir, "data_images.pk"), "wb") as f:
                pickle.dump(images_processed, f)
                print("Saved data to ./data_images.pk")


if __name__ == "__main__":
    processData()