diff --git a/Makefile b/Makefile
index 90e54a6..8084543 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ DEV_PYTHON ?= 3.11
 DOCKER ?= docker
 ARGS=
 
-.PHONY: all install build serve clean client lint format test integration_tests docker_build docker_run
+.PHONY: all install build serve clean lint format test integration_tests docker_build docker_run
 
 all: build
 
@@ -25,9 +25,6 @@ clean:
 	poetry run python -m scripts.clean
 	poetry env remove --all
 
-client: install
-	poetry run python -m client.main $(ARGS)
-
 lint: install
 	poetry run nox -s lint
 
diff --git a/client/__init__.py b/client/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/client/chat/adapter.py b/client/chat/adapter.py
deleted file mode 100644
index 7cd67b6..0000000
--- a/client/chat/adapter.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""
-Classes to test the various models supported by the DIAL adapter.
-"""
-
-from typing import AsyncGenerator, List, Optional
-
-from aidial_sdk.chat_completion import Message, Role
-
-from aidial_adapter_vertexai.adapters import get_chat_completion_model
-from aidial_adapter_vertexai.chat.chat_completion_adapter import (
-    ChatCompletionAdapter,
-)
-from aidial_adapter_vertexai.chat.errors import UserError
-from aidial_adapter_vertexai.chat.gemini.inputs import MessageWithResources
-from aidial_adapter_vertexai.deployments import ChatCompletionDeployment
-from aidial_adapter_vertexai.dial_api.request import ModelParameters
-from aidial_adapter_vertexai.dial_api.token_usage import TokenUsage
-from client.chat.base import Chat
-from client.chat.collect_consumer import CollectConsumer
-from client.utils.concurrency import str_callback_to_stream_generator
-
-
-class AdapterChat(Chat):
-    model: ChatCompletionAdapter
-    history: List[Message]
-
-    def __init__(self, model: ChatCompletionAdapter):
-        self.model = model
-        self.history = []
-
-    @classmethod
-    async def create(
-        cls, location: str, project: str, deployment: ChatCompletionDeployment
-    ) -> "AdapterChat":
-        model = await get_chat_completion_model(
-            location=location,
-            project_id=project,
-            deployment=deployment,
-            headers={},
-        )
-
-        return cls(model)
-
-    async def send_message(
-        self,
-        prompt: MessageWithResources,
-        params: ModelParameters,
-        usage: TokenUsage,
-    ) -> AsyncGenerator[str, None]:
-        self.history.append(prompt.message)
-
-        consumer: Optional[CollectConsumer] = None
-
-        async def task(on_content):
-            nonlocal consumer
-            consumer = CollectConsumer(on_content=on_content)
-            prompt = await self.model.parse_prompt(self.history)
-            if isinstance(prompt, UserError):
-                raise prompt
-
-            await self.model.chat(params, consumer, prompt)
-
-        async def on_content(chunk: str):
-            return
-
-        async for chunk in str_callback_to_stream_generator(task, on_content):
-            yield chunk
-
-        assert consumer is not None
-
-        self.history.append(
-            Message(role=Role.ASSISTANT, content=consumer.content)
-        )
-
-        usage.accumulate(consumer.usage)
diff --git a/client/chat/base.py b/client/chat/base.py
deleted file mode 100644
index e41d3a5..0000000
--- a/client/chat/base.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import AsyncIterator
-
-from aidial_adapter_vertexai.chat.gemini.inputs import MessageWithResources
-from aidial_adapter_vertexai.deployments import ChatCompletionDeployment
-from aidial_adapter_vertexai.dial_api.request import ModelParameters
-from aidial_adapter_vertexai.dial_api.token_usage import TokenUsage
-
-
-class Chat(ABC):
-    @classmethod
-    @abstractmethod
-    async def create(
-        cls, location: str, project: str, deployment: ChatCompletionDeployment
-    ) -> "Chat":
-        pass
-
-    @abstractmethod
-    def send_message(
-        self,
-        prompt: MessageWithResources,
-        params: ModelParameters,
-        usage: TokenUsage,
-    ) -> AsyncIterator[str]:
-        pass
diff --git a/client/chat/collect_consumer.py b/client/chat/collect_consumer.py
deleted file mode 100644
index f54283a..0000000
--- a/client/chat/collect_consumer.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from typing import Callable, Coroutine, List, Optional
-
-from aidial_sdk.chat_completion import Attachment, FinishReason
-
-from aidial_adapter_vertexai.chat.consumer import Consumer
-from aidial_adapter_vertexai.dial_api.token_usage import TokenUsage
-
-ContentCallback = Callable[[str], Coroutine[None, str, None]]
-
-
-class CollectConsumer(Consumer):
-    usage: TokenUsage
-    content: str
-    attachments: List[Attachment]
-    finish_reason: Optional[FinishReason]
-
-    on_content: Optional[ContentCallback]
-
-    def __init__(self, on_content: Optional[ContentCallback] = None):
-        self.usage = TokenUsage()
-        self.content = ""
-        self.attachments = []
-        self.finish_reason = None
-
-        self.on_content = on_content
-
-    async def append_content(self, content: str):
-        if self.on_content:
-            await self.on_content(content)
-        self.content += content
-
-    async def add_attachment(self, attachment: Attachment):
-        self.attachments.append(attachment)
-
-    async def set_usage(self, usage: TokenUsage):
-        self.usage = usage
-
-    async def set_finish_reason(self, finish_reason: FinishReason):
-        if self.finish_reason is None:
-            self.finish_reason = finish_reason
-        else:
-            assert (
-                self.finish_reason == finish_reason
-            ), "finish_reason was set twice with different values"
diff --git a/client/chat/sdk.py b/client/chat/sdk.py
deleted file mode 100644
index afed095..0000000
--- a/client/chat/sdk.py
+++ /dev/null
@@ -1,240 +0,0 @@
-"""
-Classes to test the various models directly through the VertexAI SDK
-"""
-
-from datetime import datetime
-from pathlib import Path
-from typing import AsyncIterator, assert_never
-
-import vertexai
-from vertexai.preview.generative_models import ChatSession as GenChatSession
-from vertexai.preview.generative_models import GenerationConfig, GenerativeModel
-from vertexai.preview.language_models import ChatModel
-from vertexai.preview.language_models import ChatSession as LangChatSession
-from vertexai.preview.language_models import CodeChatModel
-from vertexai.preview.language_models import (
-    CodeChatSession as LangCodeChatSession,
-)
-from vertexai.preview.vision_models import (
-    ImageGenerationModel,
-    ImageGenerationResponse,
-)
-
-from aidial_adapter_vertexai.chat.gemini.adapter import default_safety_settings
-from aidial_adapter_vertexai.chat.gemini.inputs import MessageWithResources
-from aidial_adapter_vertexai.deployments import ChatCompletionDeployment
-from aidial_adapter_vertexai.dial_api.request import ModelParameters
-from aidial_adapter_vertexai.dial_api.token_usage import TokenUsage
-from aidial_adapter_vertexai.utils.json import json_dumps, json_dumps_short
-from aidial_adapter_vertexai.utils.log_config import vertex_ai_logger as log
-from client.chat.base import Chat
-from client.utils.files import get_project_root
-from client.utils.printing import print_info
-
-LangSession = LangChatSession | LangCodeChatSession
-
-
-class SDKLangChat(Chat):
-    chat: LangSession
-
-    def __init__(self, chat: LangSession):
-        self.chat = chat
-
-    @classmethod
-    async def create(
-        cls, location: str, project: str, deployment: ChatCompletionDeployment
-    ) -> "SDKLangChat":
-        vertexai.init(project=project, location=location)
-        model = get_language_model_by_deployment(deployment)
-        chat = model.start_chat()
-        return cls(chat)
-
-    async def send_message(
-        self,
-        prompt: MessageWithResources,
-        params: ModelParameters,
-        usage: TokenUsage,
-    ) -> AsyncIterator[str]:
-        parameters = {
-            "max_output_tokens": params.max_tokens,
-            "temperature": params.temperature,
-            "stop_sequences": params.stop,
-            "top_p": params.top_p,
-        }
-
-        if not params.stream:
-            parameters["candidate_count"] = params.n
-
-        message = prompt.to_text()
-
-        if params.stream:
-            responses = self.chat.send_message_streaming(
-                message=message, **parameters
-            )
-            for response in responses:
-                yield response.text
-        else:
-            yield self.chat.send_message(message=message, **parameters).text
-
-
-def create_generation_config(params: ModelParameters) -> GenerationConfig:
-    return GenerationConfig(
-        max_output_tokens=params.max_tokens,
-        temperature=params.temperature,
-        stop_sequences=(
-            [params.stop] if isinstance(params.stop, str) else params.stop
-        ),
-        top_p=params.top_p,
-        candidate_count=1 if params.stream else params.n,
-    )
-
-
-class SDKGenChat(Chat):
-    chat: GenChatSession
-
-    def __init__(self, chat: GenChatSession):
-        self.chat = chat
-
-    @classmethod
-    async def create(
-        cls, location: str, project: str, deployment: ChatCompletionDeployment
-    ) -> "SDKGenChat":
-        vertexai.init(project=project, location=location)
-
-        match deployment:
-            case (
-                ChatCompletionDeployment.GEMINI_PRO_1
-                | ChatCompletionDeployment.GEMINI_PRO_VISION_1
-                | ChatCompletionDeployment.GEMINI_PRO_VISION_1_5
-            ):
-                model = GenerativeModel(deployment)
-            case _:
-                raise ValueError(f"Unsupported model: {deployment}")
-
-        chat = GenChatSession(model=model, history=[])
-
-        return cls(chat)
-
-    async def send_message(
-        self,
-        prompt: MessageWithResources,
-        params: ModelParameters,
-        usage: TokenUsage,
-    ) -> AsyncIterator[str]:
-        config = create_generation_config(params)
-        content = prompt.to_parts()
-
-        log.debug(f"request config: {json_dumps(config)}")
-        log.debug(f"request content: {json_dumps_short(content)}")
-
-        if params.stream:
-            response = await self.chat._send_message_streaming_async(
-                content=content,  # type: ignore
-                generation_config=config,
-                safety_settings=default_safety_settings,
-                tools=None,
-            )
-
-            async for chunk in response:
-                log.debug(f"response chunk: {json_dumps(chunk)}")
-                yield chunk.text
-        else:
-            response = await self.chat._send_message_async(
-                content=content,  # type: ignore
-                generation_config=config,
-                safety_settings=default_safety_settings,
-                tools=None,
-            )
-
-            log.debug(f"response: {json_dumps(response)}")
-            yield response.text
-
-
-class SDKImagenChat(Chat):
-    model: ImageGenerationModel
-
-    def __init__(self, model):
-        self.model = model
-
-    @classmethod
-    async def create(
-        cls, location: str, project: str, deployment: ChatCompletionDeployment
-    ) -> "SDKImagenChat":
-        vertexai.init(project=project, location=location)
-        model = ImageGenerationModel.from_pretrained(deployment.value)
-        return cls(model)
-
-    @staticmethod
-    def get_filename(ext) -> Path:
-        dir = get_project_root() / "~images"
-        dir.mkdir(parents=True, exist_ok=True)
-
-        current_time = datetime.now()
-        filename = current_time.strftime("%Y-%m-%d_%H-%M-%S")
-        filename += ext
-
-        return dir / filename
-
-    async def send_message(
-        self,
-        prompt: MessageWithResources,
-        params: ModelParameters,
-        usage: TokenUsage,
-    ) -> AsyncIterator[str]:
-        response: ImageGenerationResponse = self.model.generate_images(
-            prompt.to_text(), number_of_images=1, seed=None
-        )
-
-        print_info(f"Response: {response}")
-
-        if len(response.images) == 0:
-            raise RuntimeError("Expected 1 image in response, but got none")
-
-        filename = str(SDKImagenChat.get_filename(".png"))
-        response[0].save(filename)
-        yield f"Generated image: {filename}"
-
-
-async def create_sdk_chat(
-    location: str, project: str, deployment: ChatCompletionDeployment
-) -> Chat:
-    match deployment:
-        case (
-            ChatCompletionDeployment.CHAT_BISON_1
-            | ChatCompletionDeployment.CHAT_BISON_2
-            | ChatCompletionDeployment.CHAT_BISON_2_32K
-            | ChatCompletionDeployment.CODECHAT_BISON_1
-            | ChatCompletionDeployment.CODECHAT_BISON_2
-            | ChatCompletionDeployment.CODECHAT_BISON_2_32K
-        ):
-            return await SDKLangChat.create(location, project, deployment)
-        case (
-            ChatCompletionDeployment.GEMINI_PRO_1
-            | ChatCompletionDeployment.GEMINI_PRO_VISION_1
-            | ChatCompletionDeployment.GEMINI_PRO_VISION_1_5
-        ):
-            return await SDKGenChat.create(location, project, deployment)
-        case ChatCompletionDeployment.IMAGEN_005:
-            return await SDKImagenChat.create(location, project, deployment)
-        case _:
-            assert_never(deployment)
-
-
-def get_language_model_by_deployment(
-    deployment: ChatCompletionDeployment,
-) -> ChatModel | CodeChatModel:
-    match deployment:
-        case (
-            ChatCompletionDeployment.CHAT_BISON_1
-            | ChatCompletionDeployment.CHAT_BISON_2
-            | ChatCompletionDeployment.CHAT_BISON_2_32K
-        ):
-            return ChatModel.from_pretrained(deployment)
-        case (
-            ChatCompletionDeployment.CODECHAT_BISON_1
-            | ChatCompletionDeployment.CODECHAT_BISON_2
-            | ChatCompletionDeployment.CODECHAT_BISON_2_32K
-        ):
-            return CodeChatModel.from_pretrained(deployment)
-        case _:
-            raise ValueError(f"Unsupported model: {deployment}")
diff --git a/client/conf.py b/client/conf.py
deleted file mode 100644
index ccc41e4..0000000
--- a/client/conf.py
+++ /dev/null
@@ -1,2 +0,0 @@
-MAX_INPUT_CHARS = 1024
-MAX_CHAT_TURNS = 128
diff --git a/client/config.py b/client/config.py
deleted file mode 100644
index 4ed2f6e..0000000
--- a/client/config.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import argparse
-from enum import Enum
-from typing import Optional, Type
-
-from pydantic import BaseModel
-
-from aidial_adapter_vertexai.deployments import ChatCompletionDeployment
-from aidial_adapter_vertexai.dial_api.request import ModelParameters
-from client.utils.cli import select_enum, select_option
-
-
-class ClientMode(str, Enum):
-    ADAPTER = "Adapter"
-    SDK = "SDK"
-
-
-def enum_values(enum: Type[Enum]) -> list[str]:
-    return [e.value for e in enum]
-
-
-class Config(BaseModel):
-    mode: ClientMode
-    model_id: ChatCompletionDeployment
-    streaming: bool
-    max_tokens: Optional[int]
-    temperature: float
-
-    @classmethod
-    def get_interactive(cls) -> "Config":
-        parser = argparse.ArgumentParser()
-
-        parser.add_argument(
-            "--mode",
-            type=str,
-            required=False,
-            help=f"One of {enum_values(ClientMode)}",
-        )
-        parser.add_argument(
-            "--model",
-            type=str,
-            required=False,
-            help=f"One of the available models: {enum_values(ChatCompletionDeployment)}",
-        )
-        parser.add_argument(
-            "--max_tokens",
-            type=int,
-            required=False,
-            help="Max tokens",
-        )
-        parser.add_argument(
-            "-t",
-            type=float,
-            required=False,
-            help="Temperature",
-        )
-        parser.add_argument(
-            "--streaming",
-            required=False,
-            action="store_true",
-            help="Streaming mode",
-        )
-
-        args = parser.parse_args()
-
-        if args.mode is not None:
-            mode = ClientMode(args.mode)
-        else:
-            mode = select_enum("Mode", ClientMode)
-
-        if args.model is not None:
-            model_id = ChatCompletionDeployment(args.model)
-        else:
-            model_id = select_enum("Model", ChatCompletionDeployment)
-
-        if args.streaming is not None:
-            streaming = args.streaming
-        else:
-            streaming = select_option("Streaming", [False, True])
-
-        max_tokens = args.max_tokens
-        temperature = args.t or 0.0
-
-        return cls(
-            mode=mode,
-            model_id=model_id,
-            streaming=streaming,
-            max_tokens=max_tokens,
-            temperature=temperature,
-        )
-
-    def to_model_parameters(self) -> ModelParameters:
-        return ModelParameters(
-            temperature=self.temperature,
-            max_tokens=self.max_tokens,
-            stream=self.streaming,
-        )
-
-    def __str__(self) -> str:
-        streaming = "streaming" if self.streaming else "non-streaming"
-        max_tokens = f"max_tokens={self.max_tokens}" if self.max_tokens else ""
-        params = ",".join(param for param in [streaming, max_tokens] if param)
-        return f"[{params}] {self.model_id.value} {self.mode.value}"
diff --git a/client/main.py b/client/main.py
deleted file mode 100755
index e1f9829..0000000
--- a/client/main.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import asyncio
-from pathlib import Path
-from typing import List, Tuple, assert_never
-
-from aidial_sdk.chat_completion import CustomContent, Message, Role
-
-from aidial_adapter_vertexai.chat.gemini.inputs import MessageWithResources
-from aidial_adapter_vertexai.dial_api.request import ModelParameters
-from aidial_adapter_vertexai.dial_api.token_usage import TokenUsage
-from aidial_adapter_vertexai.utils.env import get_env
-from aidial_adapter_vertexai.utils.log_config import configure_loggers
-from aidial_adapter_vertexai.utils.resource import Resource
-from aidial_adapter_vertexai.utils.timer import Timer
-from client.chat.adapter import AdapterChat
-from client.chat.base import Chat
-from client.chat.sdk import create_sdk_chat
-from client.conf import MAX_CHAT_TURNS, MAX_INPUT_CHARS
-from client.config import ClientMode, Config
-from client.utils.input import make_input
-from client.utils.printing import print_ai, print_error, print_info
-
-configure_loggers()
-
-
-async def init_chat(params: Config) -> Tuple[Chat, ModelParameters]:
-    location = get_env("DEFAULT_REGION")
-    project = get_env("GCP_PROJECT_ID")
-
-    chat: Chat
-    match params.mode:
-        case ClientMode.ADAPTER:
-            chat = await AdapterChat.create(location, project, params.model_id)
-        case ClientMode.SDK:
-            chat = await create_sdk_chat(location, project, params.model_id)
-        case _:
-            assert_never(params.mode)
-
-    return chat, params.to_model_parameters()
-
-
-async def main():
-    chat, model_parameters = await init_chat(Config.get_interactive())
-
-    input = make_input()
-
-    resources: List[Resource] = []
-
-    turn = 0
-    while turn < MAX_CHAT_TURNS:
-        turn += 1
-
-        query = input()[:MAX_INPUT_CHARS]
-
-        if query in [":q", ":quit"]:
-            break
-        elif query in [":r", ":restart"]:
-            chat, model_parameters = await init_chat(Config.get_interactive())
-            continue
-        elif any(query.startswith(cmd) for cmd in [":a ", ":attach "]):
-            path = Path(query.split(" ", 1)[1])
-            resources.append(Resource.from_path(path))
-            continue
-        elif query == "":
-            continue
-
-        usage = TokenUsage()
-        timer = Timer()
-
-        attachments = [res.to_attachment() for res in resources]
-        message = Message(
-            role=Role.USER,
-            content=query,
-            custom_content=CustomContent(attachments=attachments),
-        )
-
-        try:
-            async for chunk in chat.send_message(
-                MessageWithResources(message=message, resources=resources),
-                model_parameters,
-                usage,
-            ):
-                print_ai(chunk, end="")
-
-            print_ai("")
-        except Exception as e:
-            print_error(f"Error: {str(e)}")
-
-        resources = []
-
-        print_info(f"Timing: {timer}")
-        print_info(f"Usage: {usage}")
-
-
-if __name__ == "__main__":
-    try:
-        asyncio.run(main())
-    except KeyboardInterrupt:
-        print_info("Shutting down...")
diff --git a/client/utils/__init__.py b/client/utils/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/client/utils/cli.py b/client/utils/cli.py
deleted file mode 100644
index efa3882..0000000
--- a/client/utils/cli.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from enum import Enum
-from typing import List, Type, TypeVar
-
-import inquirer
-
-V = TypeVar("V")
-
-
-def select_option(title: str, options: List[V]) -> V:
-    questions = [
-        inquirer.List(
-            "option",
-            message=title,
-            choices=[(str(option), option) for option in options],
-            carousel=True,
-        ),
-    ]
-    return inquirer.prompt(questions)["option"]  # type: ignore
-
-
-T = TypeVar("T", bound=Enum)
-
-
-def select_enum(title: str, enum: Type[T]) -> T:
-    questions = [
-        inquirer.List(
-            "option",
-            message=title,
-            choices=[(option.value, option) for option in enum],
-            carousel=True,
-        ),
-    ]
-    return inquirer.prompt(questions)["option"]  # type: ignore
diff --git a/client/utils/concurrency.py b/client/utils/concurrency.py
deleted file mode 100644
index ecc3089..0000000
--- a/client/utils/concurrency.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import asyncio
-
-
-async def str_callback_to_stream_generator(task, callback):
-    queue = asyncio.Queue()
-
-    async def new_callback(chunk: str):
-        await callback(chunk)
-        await queue.put(chunk)
-
-    async def new_task():
-        await task(new_callback)
-        await queue.put(None)
-
-    response_task = asyncio.create_task(new_task())
-
-    done_response = False
-    done_chunks = False
-
-    while True:
-        chunk_task = asyncio.create_task(queue.get())
-
-        done, _pending = await asyncio.wait(
-            [response_task, chunk_task],
-            return_when=asyncio.FIRST_COMPLETED,
-        )
-
-        if response_task in done:
-            response_task.result()
-            done_response = True
-
-        chunk = chunk_task.result() if chunk_task in done else await chunk_task
-
-        if chunk is None:
-            done_chunks = True
-        else:
-            yield chunk
-
-        if done_response and done_chunks:
-            break
diff --git a/client/utils/files.py b/client/utils/files.py
deleted file mode 100644
index 01299e9..0000000
--- a/client/utils/files.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from pathlib import Path
-
-
-def get_project_root() -> Path:
-    return Path(__file__).parent.parent
diff --git a/client/utils/init.py b/client/utils/init.py
deleted file mode 100644
index 5477bc3..0000000
--- a/client/utils/init.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from dotenv import load_dotenv
-
-
-def init():
-    load_dotenv()  # take environment variables from .env.
diff --git a/client/utils/input.py b/client/utils/input.py
deleted file mode 100644
index 6e3513d..0000000
--- a/client/utils/input.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from prompt_toolkit import PromptSession
-from prompt_toolkit.history import FileHistory
-from prompt_toolkit.styles import Style
-
-from client.utils.files import get_project_root
-
-
-def make_input(max_input_chars: int = 1024):
-    session = None
-
-    def input(prompt_text="> ", style=Style.from_dict({"": "#ff0000"})) -> str:
-        nonlocal session
-        if session is None:
-            session = PromptSession(
-                history=FileHistory(str(get_project_root() / ".history"))
-            )
-
-        response = session.prompt(prompt_text, style=style, in_thread=True)
-        return response[:max_input_chars]
-
-    return input
diff --git a/client/utils/printing.py b/client/utils/printing.py
deleted file mode 100644
index 60b42e8..0000000
--- a/client/utils/printing.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from colorama import Back, Fore, Style
-
-
-def get_input(prefix: str = "") -> str:
-    print(Fore.RED, flush=True, end="")
-    content = input(prefix)
-    print(Style.RESET_ALL, flush=True, end="")
-    return content
-
-
-def print_red(msg: str, **args):
-    print(Fore.RED + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_error(msg: str, **args):
-    print(Fore.RED + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_warning(msg: str, **args):
-    print(Fore.YELLOW + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_info(msg: str, **args):
-    print(Fore.YELLOW + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_system(msg: str, **args):
-    print(Back.LIGHTBLACK_EX + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_human(msg: str, **args):
-    print(Fore.GREEN + msg + Style.RESET_ALL, flush=True, **args)
-
-
-def print_ai(msg: str, **args):
-    print(Fore.BLUE + msg + Style.RESET_ALL, flush=True, **args)
diff --git a/poetry.lock b/poetry.lock
index 1ac06ce..d49aebb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -161,17 +161,6 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
-[[package]]
-name = "ansicon"
-version = "1.89.0"
-description = "Python wrapper for loading Jason Hood's ANSICON"
-optional = false
-python-versions = "*"
-files = [
-    {file = "ansicon-1.89.0-py2.py3-none-any.whl", hash = "sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec"},
-    {file = "ansicon-1.89.0.tar.gz", hash = "sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1"},
-]
-
 [[package]]
 name = "anyio"
 version = "3.7.1"
@@ -307,22 +296,6 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
 jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
 uvloop = ["uvloop (>=0.15.2)"]
 
-[[package]]
-name = "blessed"
-version = "1.20.0"
-description = "Easy, practical library for making terminal apps, by providing an elegant, well-documented interface to Colors, Keyboard input, and screen Positioning capabilities."
-optional = false
-python-versions = ">=2.7"
-files = [
-    {file = "blessed-1.20.0-py2.py3-none-any.whl", hash = "sha256:0c542922586a265e699188e52d5f5ac5ec0dd517e5a1041d90d2bbf23f906058"},
-    {file = "blessed-1.20.0.tar.gz", hash = "sha256:2cdd67f8746e048f00df47a2880f4d6acbcdb399031b604e34ba8f71d5787680"},
-]
-
-[package.dependencies]
-jinxed = {version = ">=1.1.0", markers = "platform_system == \"Windows\""}
-six = ">=1.9.0"
-wcwidth = ">=0.1.4"
-
 [[package]]
 name = "cachetools"
 version = "5.3.1"
@@ -1169,22 +1142,6 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
-[[package]]
-name = "inquirer"
-version = "3.1.3"
-description = "Collection of common interactive command line user interfaces, based on Inquirer.js"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "inquirer-3.1.3-py3-none-any.whl", hash = "sha256:a7441fd74d06fcac4385218a1f5e8703f7a113f7944e01af47b8c58e84f95ce5"},
-    {file = "inquirer-3.1.3.tar.gz", hash = "sha256:aac309406f5b49d4b8ab7c6872117f43bf082a552dc256aa16bc95e16bb58bec"},
-]
-
-[package.dependencies]
-blessed = ">=1.19.0"
-python-editor = ">=1.0.4"
-readchar = ">=3.0.6"
-
 [[package]]
 name = "isort"
 version = "5.12.0"
@@ -1202,20 +1159,6 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"
 plugins = ["setuptools"]
 requirements-deprecated-finder = ["pip-api", "pipreqs"]
 
-[[package]]
-name = "jinxed"
-version = "1.2.0"
-description = "Jinxed Terminal Library"
-optional = false
-python-versions = "*"
-files = [
-    {file = "jinxed-1.2.0-py2.py3-none-any.whl", hash = "sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b"},
-    {file = "jinxed-1.2.0.tar.gz", hash = "sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5"},
-]
-
-[package.dependencies]
-ansicon = {version = "*", markers = "platform_system == \"Windows\""}
-
 [[package]]
 name = "jsonpatch"
 version = "1.33"
@@ -2007,20 +1950,6 @@ files = [
 [package.extras]
 twisted = ["twisted"]
 
-[[package]]
-name = "prompt-toolkit"
-version = "3.0.38"
-description = "Library for building powerful interactive command lines in Python"
-optional = false
-python-versions = ">=3.7.0"
-files = [
-    {file = "prompt_toolkit-3.0.38-py3-none-any.whl", hash = "sha256:45ea77a2f7c60418850331366c81cf6b5b9cf4c7fd34616f733c5427e6abbb1f"},
-    {file = "prompt_toolkit-3.0.38.tar.gz", hash = "sha256:23ac5d50538a9a38c8bde05fecb47d0b403ecd0662857a86f886f798563d5b9b"},
-]
-
-[package.dependencies]
-wcwidth = "*"
-
 [[package]]
 name = "proto-plus"
 version = "1.22.3"
@@ -2304,18 +2233,6 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]
 
-[[package]]
-name = "python-editor"
-version = "1.0.4"
-description = "Programmatically open an editor, capture the result."
-optional = false
-python-versions = "*"
-files = [
-    {file = "python-editor-1.0.4.tar.gz", hash = "sha256:51fda6bcc5ddbbb7063b2af7509e43bd84bfc32a4ff71349ec7847713882327b"},
-    {file = "python_editor-1.0.4-py2-none-any.whl", hash = "sha256:5f98b069316ea1c2ed3f67e7f5df6c0d8f10b689964a4a811ff64f0106819ec8"},
-    {file = "python_editor-1.0.4-py3-none-any.whl", hash = "sha256:1bf6e860a8ad52a14c3ee1252d5dc25b2030618ed80c022598f00176adc8367d"},
-]
-
 [[package]]
 name = "pyyaml"
 version = "6.0.1"
@@ -2375,20 +2292,6 @@ files = [
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
 ]
 
-[[package]]
-name = "readchar"
-version = "4.0.5"
-description = "Library to easily read single chars and key strokes"
-optional = false
-python-versions = ">=3.7"
-files = [
-    {file = "readchar-4.0.5-py3-none-any.whl", hash = "sha256:76ec784a5dd2afac3b7da8003329834cdd9824294c260027f8c8d2e4d0a78f43"},
-    {file = "readchar-4.0.5.tar.gz", hash = "sha256:08a456c2d7c1888cde3f4688b542621b676eb38cd6cfed7eb6cb2e2905ddc826"},
-]
-
-[package.dependencies]
-setuptools = ">=41.0"
-
 [[package]]
 name = "regex"
 version = "2023.12.25"
@@ -2806,17 +2709,6 @@ platformdirs = ">=3.9.1,<4"
 docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
 test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
 
-[[package]]
-name = "wcwidth"
-version = "0.2.8"
-description = "Measures the displayed width of unicode strings in a terminal"
-optional = false
-python-versions = "*"
-files = [
-    {file = "wcwidth-0.2.8-py2.py3-none-any.whl", hash = "sha256:77f719e01648ed600dfa5402c347481c0992263b81a027344f3e1ba25493a704"},
-    {file = "wcwidth-0.2.8.tar.gz", hash = "sha256:8705c569999ffbb4f6a87c6d1b80f324bd6db952f5eb0b95bc07517f4c1813d4"},
-]
-
 [[package]]
 name = "wrapt"
 version = "1.15.0"
@@ -3006,4 +2898,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "~3.11"
-content-hash = "4b98015340dcc9a7faba81bb63f370667f420a325d5fb1bfff8b0493fa8173ba"
+content-hash = "f50c96b2b56fa57740a345a8d9e4dca6a5365b5552ad0cd20efb77960ab72a42"
diff --git a/pyproject.toml b/pyproject.toml
index 15c9ab4..9c90435 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,9 +43,6 @@ pyright = "1.1.324"
 
 [tool.poetry.group.dev.dependencies]
 nox = "^2023.4.22"
-colorama = "^0.4.4"
-inquirer = "3.1.3"
-prompt-toolkit = "3.0.38"
 google-auth-oauthlib = "1.0.0"
 langchain-openai = "0.0.8"
 langchain-core = "0.1.35"
diff --git a/tests/utils/callback.py b/tests/utils/callback.py
index 2508475..2136b1d 100644
--- a/tests/utils/callback.py
+++ b/tests/utils/callback.py
@@ -4,8 +4,6 @@
 from langchain_core.outputs import LLMResult
 from typing_extensions import override
 
-from client.utils.printing import print_ai
-
 
 class CallbackWithNewLines(StreamingStdOutCallbackHandler):
     prev: str
@@ -31,8 +29,8 @@ def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
             token = token[:-1]
 
         s = token.replace("\\n", "\n").replace('\\"', '"')
-        print_ai(s, end="")
+        print(s, end="")
 
     @override
     def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
-        print_ai("")
+        print("")