Revert "[Model][OpenVINO] Fix regressions from vllm-project#8346 (vll…

…m-project#10045)" This reverts commit ffc0f2b.
flaviabeo · Nov 6, 2024 · bf80f0b · bf80f0b
1 parent b4fdf7d
commit bf80f0b
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 15 deletions.
diff --git a/.buildkite/run-openvino-test.sh b/.buildkite/run-openvino-test.sh
@@ -11,4 +11,4 @@ trap remove_docker_container EXIT
 remove_docker_container
 
 # Run the image and launch offline inference
-docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py
+docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/vllm/examples/offline_inference.py
diff --git a/vllm/attention/backends/openvino.py b/vllm/attention/backends/openvino.py
@@ -1,13 +1,12 @@
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple, Type
+from typing import List, Tuple, Type
 
 import openvino as ov
 import torch
 
 from vllm.attention.backends.abstract import (AttentionBackend,
                                               AttentionMetadata)
 from vllm.attention.backends.utils import CommonAttentionState
-from vllm.multimodal import MultiModalPlaceholderMap
 
 
 def copy_cache_block(src_tensor: ov.Tensor, dst_tensor: ov.Tensor,
@@ -129,12 +128,3 @@ class OpenVINOAttentionMetadata:
     # Shape: scalar
     # Type: i32
     max_context_len: torch.Tensor
-
-    # The index maps that relate multi-modal embeddings to the corresponding
-    # placeholders.
-    #
-    # N.B. These aren't really related to attention and don't belong on this
-    # type -- this is just a temporary solution to make them available to
-    # `model_executable`.
-    multi_modal_placeholder_index_maps: Optional[Dict[
-        str, MultiModalPlaceholderMap.IndexMap]]
diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py
@@ -21,8 +21,8 @@
                               get_tensor_model_parallel_world_size,
                               split_tensor_along_last_dim,
                               tensor_model_parallel_all_gather)
-from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, DummyData,
-                         InputContext, token_inputs)
+from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, InputContext,
+                         token_inputs)
 from vllm.model_executor import SamplingMetadata
 from vllm.model_executor.layers.activation import QuickGELU, SiluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
@@ -915,7 +915,7 @@ def dummy_data_for_molmo(ctx: InputContext, seq_len: int,
     if "image_masks" in out:
         dummy_imgdata["image_masks"] = out["image_masks"]
     dummy_imgdata["seq_len"] = torch.tensor(seq_len, dtype=torch.long)
-    return DummyData(dummy_seqdata, {"image": dummy_imgdata})
+    return dummy_seqdata, {"image": dummy_imgdata}
 
 
 def pad_images(