-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
57 lines (40 loc) · 1.82 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md
from typing import List
from cog import BaseModel, BasePredictor, File, Input
from PIL import Image
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
import torch
# os.environ["TRANSFORMERS_VERBOSITY"] = "info"
MODEL_NAME = "openai/clip-vit-large-patch14"
CACHE_DIR = ".transformer"
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class Output(BaseModel):
embedding: List[float]
class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
self.model: CLIPModel = CLIPModel.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR) # type: ignore
self.model = self.model.to(device)
self.processor = AutoProcessor.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
def predict(
self,
text: str = Input(description="Input text", default=None),
image: File = Input(description="Input image", default=None),
) -> Output:
"""Run a single prediction on the model"""
if image:
image = Image.open(image)
inputs = self.processor(images=image, return_tensors="pt").to(device)
image_features = self.model.get_image_features(**inputs)
embedding = image_features.tolist()[0]
elif text:
inputs = self.tokenizer([text], padding=True, return_tensors="pt").to(
device
)
text_features = self.model.get_text_features(**inputs) # type: ignore
embedding = text_features.tolist()[0]
else:
raise Exception("Missing inputs.")
return Output(embedding=embedding)