-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLlava.py
46 lines (40 loc) · 1.65 KB
/
Llava.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
## First you must install LLAMA.cpp with cublas
!git clone https://github.com/ggerganov/llama.cpp
%cd llama.cpp
!make LLAMA_CUBLAS=1
### Then download the model, the following is mistral llava 1.6 and its nice quality + good speed.
!wget https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/ggml-mistral-q_4_k.gguf
# Run the server for llama.cpp, Important to do nohup and & as that will execute it in background, so its possible to run other things
!nohup ./server -m "/content/drive/MyDrive/mistral-7b-q_5_k.gguf" -c 2048 -ngl 99 --mmproj "/content/drive/MyDrive/mmproj-mistral7b-f16.gguf" --host 0.0.0.0 --port 8000 &
# imports needed
import requests
import base64
import json
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def llava_gen(prompt, image):
base64_image = encode_image("/content/fullbblock.jpg")
headers = {
'Content-Type': 'application/json',
}
json_data = {
'image_data': [{
'data': base64_image,
'id': 10
}],
'prompt': 'USER:[img-10]Describe the image.\nASSISTANT:',
'stream': True
}
print()
response = requests.post("http://0.0.0.0:8000/completion", headers=headers, json=json_data, stream=True)
for chunk in response.iter_content(chunk_size=128):
content = chunk.decode().strip().split('\n\n')[0]
try:
content_split = content.split('data: ')
if len(content_split) > 1:
content_json = json.loads(content_split[1])
print(content_json["content"], end='', flush=True)
except json.JSONDecodeError:
break