Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

generateEmbeddings.py 4.6 KB

You have to be logged in to leave a comment. Sign In
1
  1. import os import pickle import tqdm from PIL import Image import torch from torch.nn import functional as F from transformers import AutoProcessor, GroupViTVisionModel, GroupViTTextModel, CLIPTokenizer import settings from utils.embeddingDataset import loadData train = True do_text = True do_image = False if train: out_dir = settings.TrainSettings.Output.out_dir else: out_dir = settings.ValidationSettings.Output.out_dir def batch(iterable, n=1): l = len(iterable) for ndx in range(0, l, n): yield iterable[ndx:min(ndx + n, l)] def processData(): # models imgs, entities, captions, _ = loadData(train) processor = AutoProcessor.from_pretrained("nvidia/groupvit-gcc-yfcc") model_image = GroupViTVisionModel.from_pretrained("nvidia/groupvit-gcc-yfcc").to("cuda") tokenizer = CLIPTokenizer.from_pretrained("nvidia/groupvit-gcc-yfcc") model_text = GroupViTTextModel.from_pretrained("nvidia/groupvit-gcc-yfcc").to("cuda") models_preprocess = [] saves = 0 with torch.no_grad(): if do_text: for img in tqdm.tqdm(imgs): key = int(os.path.split(img)[-1].split(".")[0]) strings = [] for index, caption in enumerate(captions[key][1:]): for i in entities[key][index]: strings.append(f"{entities[key][index][i]} {caption} {entities[key][index][i]}") text_inputs = tokenizer(strings, padding=True, return_tensors="pt") text_inputs["input_ids"] = text_inputs["input_ids"].to("cuda") text_inputs["attention_mask"] = text_inputs["attention_mask"].to("cuda") text_segs = model_text(input_ids=text_inputs["input_ids"], attention_mask=text_inputs["attention_mask"], output_attentions=True) text_segs = text_segs.last_hidden_state # pad to make dim 20x20x256 i = 0 for index, caption in enumerate(captions[key][1:]): segments = [] strings = [] for _ in entities[key][index]: segment = text_segs[i] i += 1 strings.append(f"{entities[key][index][_]} {caption} {entities[key][index][_]}") segments.append(segment) if len(segments): text_segs_batch = torch.stack(segments) text_segs_batch = F.pad(input=text_segs_batch, pad=(0, 0, 0, max(0, 20 - text_segs.size(1)), 0, max(0, 20 - text_segs.size(0))), mode='constant', value=0).detach().cpu().numpy() models_preprocess.append((key, strings, text_segs_batch)) if len(models_preprocess) % 10_000 == 0: if os.path.exists(os.path.join(out_dir, f"data_text_saves{saves}.pk")): os.remove(os.path.join(out_dir, f"data_text_saves{saves}.pk")) with open(os.path.join(out_dir, f"data_text_saves{saves}.pk"), "wb") as f: pickle.dump(models_preprocess, f) print(f"Saved {len(models_preprocess)} to ./data_text_saves{saves}.pk") saves += 1 models_preprocess = [] print() print("Done processing text data") with open(os.path.join(out_dir, "data_text.pk"), "wb") as f: pickle.dump(models_preprocess, f) print("Saved data to ./data_text.pk") if do_image: # get batches process imgs to batches images_processed = {} for b in tqdm.tqdm(batch(imgs, 32), total=len(imgs) // 32 + (1 if len(imgs) % 32 != 0 else 0)): img_inputs = processor(images=[Image.open(img) for img in b], return_tensors="pt").to("cuda") outputs = model_image(**img_inputs, output_attentions=True) i = 0 for img in b: key = int(os.path.split(img)[-1].split(".")[0]) images_processed[key] = outputs.attentions[0][i].detach().cpu().numpy() i += 1 print() print("Done processing image data") with open(os.path.join(out_dir, "data_images.pk"), "wb") as f: pickle.dump(images_processed, f) print("Saved data to ./data_images.pk") if __name__ == "__main__": processData()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...