Llava 1.6 34B
This is a highly performant open-source vision language model with capabilities similar to GPT-4
Deploy Llava 1.6 34B behind an API endpoint in seconds.
Deploy modelExample usage
To run inference with this model there are two main expected inputs:
query
: This is the instruction given to the modelimage
: This is a base64 image
The model will output text based on the query provided.
1from PIL import Image
2from io import BytesIO
3import requests
4import os
5import base64
6
7# Replace the empty string with your model id below
8model_id = ""
9baseten_api_key = os.environ["BASETEN_API_KEY"]
10
11def pil_to_b64(pil_img):
12 buffered = BytesIO()
13 pil_img.save(buffered, format="PNG")
14 img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
15 return img_str
16
17data = {
18 "query": "Provide a detailed description of the image",
19 "image": pil_to_b64(Image.open("/path/to/image/mountain.jpeg")),
20 "stream": True,
21 "max_tokens": 512
22}
23
24# Call model endpoint
25res = requests.post(
26 f"https://model-{model_id}.api.baseten.co/production/predict",
27 headers={"Authorization": f"Api-Key {baseten_api_key}"},
28 json=data,
29 stream=True
30)
31
32# Print the generated tokens as they get streamed
33for content in res.iter_content():
34 print(content.decode("utf-8"), end="", flush=True)
1[
2 "The",
3 "image",
4 "depicts",
5 "a",
6 "majestic",
7 "...."
8]
Here is an example without streaming the tokens.
1from PIL import Image
2from io import BytesIO
3import requests
4import os
5import base64
6
7# Replace the empty string with your model id below
8model_id = ""
9baseten_api_key = os.environ["BASETEN_API_KEY"]
10
11def pil_to_b64(pil_img):
12 buffered = BytesIO()
13 pil_img.save(buffered, format="PNG")
14 img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
15 return img_str
16
17data = {
18 "query": "Provide a detailed description of the image",
19 "image": pil_to_b64(Image.open("/path/to/image/mountain.jpeg")),
20 "stream": False,
21 "max_tokens": 512
22}
23
24# Call model endpoint
25res = requests.post(
26 f"https://model-{model_id}.api.baseten.co/production/predict",
27 headers={"Authorization": f"Api-Key {baseten_api_key}"},
28 json=data
29)
30
31# Print the entire output
32print(res.json())
1{
2 "result": "The image depicts a majestic granite cliff face, which is characteristic of the Half Dome in Yosemite National Park, California. The cliff is bathed in sunlight, highlighting its smooth, curved surface and the vertical cracks that are typical of such geological formations. The sky above is a clear blue with a few scattered clouds, suggesting a sunny day. In the foreground, there are trees with green and yellow leaves, indicating that the photo might have been taken in the fall season. The overall scene conveys a sense of natural beauty and grandeur."
3}