From 1431b1c0766700be5f87c40ad150c08c49f78fb4 Mon Sep 17 00:00:00 2001 From: Ajeet Singh Raina Date: Thu, 6 Jun 2024 22:39:45 +0530 Subject: [PATCH] Added --- README.md | 211 +++++++++++++++++++++++++ krs/__init__.py | 0 krs/krs.py | 101 ++++++++++++ krs/main.py | 297 +++++++++++++++++++++++++++++++++++ krs/requirements.txt | 3 + krs/utils/__init__.py | 0 krs/utils/cluster_scanner.py | 105 +++++++++++++ krs/utils/constants.py | 18 +++ krs/utils/fetch_tools_krs.py | 83 ++++++++++ krs/utils/functional.py | 75 +++++++++ krs/utils/llm_client.py | 202 ++++++++++++++++++++++++ setup.py | 28 ++++ 12 files changed, 1123 insertions(+) create mode 100644 README.md create mode 100644 krs/__init__.py create mode 100755 krs/krs.py create mode 100644 krs/main.py create mode 100644 krs/requirements.txt create mode 100644 krs/utils/__init__.py create mode 100644 krs/utils/cluster_scanner.py create mode 100644 krs/utils/constants.py create mode 100644 krs/utils/fetch_tools_krs.py create mode 100644 krs/utils/functional.py create mode 100644 krs/utils/llm_client.py create mode 100644 setup.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..685ef49 --- /dev/null +++ b/README.md @@ -0,0 +1,211 @@ +# Kubetools Recommender System + + +image + +![Twitter](https://img.shields.io/twitter/follow/kubetools?style=social) + +A GenAI-powered Kubetools Recommender system for your Kubernetes cluster. It comes with the following capabilities: + +- Ability to scan your existing Kubernetes cluster +- Available in the form of CLI tool as well as listed on the [Kubetools](https://kubetools.io) webpage +- Ability to recommend you with the best tool and categories based on your running workloads +- Support OpenAI and Hugging Face + +image + + +## Prerequisites: + +1. A Kubernetes cluster up and running locally or in the Cloud. +2. Python 3.6+ + +Note: If the kube config path for your cluster is not the default *(~/.kube/config)*, ensure you are providing it during `krs init` + +## Tested Environment + +- Docker Desktop(Mac, Linux and Windows) +- Minikube + + +## Getting Started + + +## Clone the repository + +``` +git clone https://github.com/KrsGPTs/krs.git +``` + +### Install the Tool + +Change directory to /krs and run the following command to install krs locally on your system: + +``` +pip install . +```` + + +## Krs CLI + +``` + + Usage: krs [OPTIONS] COMMAND [ARGS]... + +╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ --install-completion Install completion for the current shell. │ +│ --show-completion Show completion for the current shell, to copy it or customize the installation. │ +│ --help Show this message and exit. │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +╭─ Commands ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ +│ exit Ends krs services safely and deletes all state files from system. Removes all cached data. │ +│ export Exports pod info with logs and events. │ +│ health Starts an interactive terminal to chat with user. │ +│ init Initializes the services and loads the scanner. │ +│ namespaces Lists all the namespaces. │ +│ pods Lists all the pods with namespaces, or lists pods under a specified namespace. │ +│ recommend Generates a table of recommended tools from our ranking database and their CNCF project status. │ +│ scan Scans the cluster and extracts a list of tools that are currently used. │ +╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ +``` + +## Initialise and load the scanner + +Run the following command to initialize the services and loads the scanner. + + +``` +krs init +``` + +## Scan your cluster + +Run the following command to scan the cluster and extract a list of tools that are currently used. + +``` +krs scan +``` + +``` +krs scan + +Scanning your cluster... + +Cluster scanned successfully... + +Extracted tools used in cluster... + + +The cluster is using the following tools: + ++-------------+--------+-----------------------------+---------------+ +| Tool Name | Rank | Category | CNCF Status | ++=============+========+=============================+===============+ +| kubeview | 30 | Cluster with Core CLI tools | unlisted | ++-------------+--------+-----------------------------+---------------+ +| | 3 | Cluster Management | unlisted | ++-------------+--------+-----------------------------+---------------+ +``` + + +## Lists all the namespaces + +``` +krs namespaces +Namespaces in your cluster are: + +1. default +2. kube-node-lease +3. kube-public +4. kube-system +5. ns1 +``` + +## List pods under a specified namespace + +``` +krs pods --namespace ns1 + +Pods in namespace 'ns1': + +1. nginx-pod +``` + +## krs recommend + +Generates a table of recommended tools from our ranking database and their CNCF project status. + +``` +krs recommend + +Our recommended tools for this deployment are: + ++-----------------------------+------------------+-------------+---------------+ +| Category | Recommendation | Tool Name | CNCF Status | ++=============================+==================+=============+===============+ +| Cluster with Core CLI tools | Recommended tool | k9s | unlisted | ++-----------------------------+------------------+-------------+---------------+ +| Cluster Management | Recommended tool | rancher | unlisted | ++-----------------------------+------------------+-------------+---------------+ +``` + +## Krs health + +``` +krs health + +Starting interactive terminal... + + +Choose the model provider for healthcheck: + +[1] OpenAI +[2] Huggingface + +>> +``` + +Let's say you choose 1, the it will install necessary libraries. + + +``` +Enter your OpenAI API key: sk-3im1ZgCbKXXXXXXXXegTpTyyOq2mR + +Enter the OpenAI model name: gpt-3.5-turbo +API key and model are valid. + +Namespaces in the cluster: + +1. default +2. kube-node-lease +3. kube-public +4. kube-system +5. ns1 + +Which namespace do you want to check the health for? Select a namespace by entering its number: >> 5 + +Pods in the namespace ns1: + +1. nginx-pod + +Which pod from ns1 do you want to check the health for? Select a pod by entering its number: >> +Checking status of the pod... + +Extracting logs and events from the pod... + +Logs and events from the pod extracted successfully! + + +Interactive session started. Type 'end chat' to exit from the session! + +>> The provided log entries are empty, as there is nothing between the curly braces {}. Therefore, everything looks good and there are no warnings or errors to report. +``` + + + + + + + + + diff --git a/krs/__init__.py b/krs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/krs/krs.py b/krs/krs.py new file mode 100755 index 0000000..ad2b46d --- /dev/null +++ b/krs/krs.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 + +import typer, os +from krs.main import KrsMain +from krs.utils.constants import KRSSTATE_PICKLE_FILEPATH, KRS_DATA_DIRECTORY + +app = typer.Typer() +krs = KrsMain() + +def check_initialized(): + if not os.path.exists(KRSSTATE_PICKLE_FILEPATH): + typer.echo("KRS is not initialized. Please run 'krs init' first.") + raise typer.Exit() + +if not os.path.exists(KRS_DATA_DIRECTORY): + os.mkdir(KRS_DATA_DIRECTORY) + +@app.command() +def init(): + """ + Initializes the services and loads the scanner. + """ + krs.initialize() + typer.echo("Services initialized and scanner loaded.") + +@app.command() +def scan(): + """ + Scans the cluster and extracts a list of tools that are currently used. + """ + check_initialized() + krs.scan_cluster() + + +@app.command() +def namespaces(): + """ + Lists all the namespaces. + """ + check_initialized() + namespaces = krs.list_namespaces() + typer.echo("Namespaces in your cluster are: \n") + for i, namespace in enumerate(namespaces): + typer.echo(str(i+1)+ ". "+ namespace) + +@app.command() +def pods(namespace: str = typer.Option(None, help="Specify namespace to list pods from")): + """ + Lists all the pods with namespaces, or lists pods under a specified namespace. + """ + check_initialized() + if namespace: + pods = krs.list_pods(namespace) + if pods == 'wrong namespace name': + typer.echo(f"\nWrong namespace name entered, try again!\n") + raise typer.Abort() + typer.echo(f"\nPods in namespace '{namespace}': \n") + else: + pods = krs.list_pods_all() + typer.echo("\nAll pods in the cluster: \n") + + for i, pod in enumerate(pods): + typer.echo(str(i+1)+ '. '+ pod) + +@app.command() +def recommend(): + """ + Generates a table of recommended tools from our ranking database and their CNCF project status. + """ + check_initialized() + krs.generate_recommendations() + +@app.command() +def health(change_model: bool = typer.Option(False, help="Option to reinitialize/change the LLM, if set to True")): + """ + Starts an interactive terminal to chat with user. + """ + check_initialized() + typer.echo("\nStarting interactive terminal...\n") + krs.health_check(change_model) + +@app.command() +def export(): + """ + Exports pod info with logs and events. + """ + check_initialized() + krs.export_pod_info() + typer.echo("Pod info with logs and events exported. Json file saved to current directory!") + +@app.command() +def exit(): + """ + Ends krs services safely and deletes all state files from system. Removes all cached data. + """ + check_initialized() + krs.exit() + typer.echo("Krs services closed safely.") + +if __name__ == "__main__": + app() diff --git a/krs/main.py b/krs/main.py new file mode 100644 index 0000000..d116aeb --- /dev/null +++ b/krs/main.py @@ -0,0 +1,297 @@ +from krs.utils.fetch_tools_krs import krs_tool_ranking_info +from krs.utils.cluster_scanner import KubetoolsScanner +from krs.utils.llm_client import KrsGPTClient +from krs.utils.functional import extract_log_entries, CustomJSONEncoder +from termcolor import colored +import os, pickle, time, json +from tabulate import tabulate +from krs.utils.constants import (KRSSTATE_PICKLE_FILEPATH, LLMSTATE_PICKLE_FILEPATH, POD_INFO_FILEPATH, KRS_DATA_DIRECTORY) + +class KrsMain: + + def __init__(self): + + self.pod_info = None + self.pod_list = None + self.namespaces = None + self.deployments = None + self.state_file = KRSSTATE_PICKLE_FILEPATH + self.isClusterScanned = False + self.continue_chat = False + self.logs_extracted = [] + self.scanner = None + self.get_events = True + self.get_logs = True + self.cluster_tool_list = None + self.detailed_cluster_tool_list = None + self.category_cluster_tools_dict = None + + self.load_state() + + def initialize(self, config_file='~/.kube/config'): + self.config_file = config_file + self.tools_dict, self.category_dict, cncf_status_dict = krs_tool_ranking_info() + self.cncf_status = cncf_status_dict['cncftools'] + self.scanner = KubetoolsScanner(self.get_events, self.get_logs, self.config_file) + self.save_state() + + def save_state(self): + state = { + 'pod_info': self.pod_info, + 'pod_list': self.pod_list, + 'namespaces': self.namespaces, + 'deployments': self.deployments, + 'cncf_status': self.cncf_status, + 'tools_dict': self.tools_dict, + 'category_tools_dict': self.category_dict, + 'extracted_logs': self.logs_extracted, + 'kubeconfig': self.config_file, + 'isScanned': self.isClusterScanned, + 'cluster_tool_list': self.cluster_tool_list, + 'detailed_tool_list': self.detailed_cluster_tool_list, + 'category_tool_list': self.category_cluster_tools_dict + } + os.makedirs(os.path.dirname(self.state_file), exist_ok=True) + with open(self.state_file, 'wb') as f: + pickle.dump(state, f) + + def load_state(self): + if os.path.exists(self.state_file): + with open(self.state_file, 'rb') as f: + state = pickle.load(f) + self.pod_info = state.get('pod_info') + self.pod_list = state.get('pod_list') + self.namespaces = state.get('namespaces') + self.deployments = state.get('deployments') + self.cncf_status = state.get('cncf_status') + self.tools_dict = state.get('tools_dict') + self.category_dict = state.get('category_tools_dict') + self.logs_extracted = state.get('extracted_logs') + self.config_file = state.get('kubeconfig') + self.isClusterScanned = state.get('isScanned') + self.cluster_tool_list = state.get('cluster_tool_list') + self.detailed_cluster_tool_list = state.get('detailed_tool_list') + self.category_cluster_tools_dict = state.get('category_tool_list') + self.scanner = KubetoolsScanner(self.get_events, self.get_logs, self.config_file) + + def check_scanned(self): + if not self.isClusterScanned: + self.pod_list, self.pod_info, self.deployments, self.namespaces = self.scanner.scan_kubernetes_deployment() + self.save_state() + + def list_namespaces(self): + self.check_scanned() + return self.scanner.list_namespaces() + + def list_pods(self, namespace): + self.check_scanned() + if namespace not in self.list_namespaces(): + return "wrong namespace name" + return self.scanner.list_pods(namespace) + + def list_pods_all(self): + self.check_scanned() + return self.scanner.list_pods_all() + + def detect_tools_from_repo(self): + tool_set = set() + for pod in self.pod_list: + for service_name in pod.split('-'): + if service_name in self.tools_dict.keys(): + tool_set.add(service_name) + + for dep in self.deployments: + for service_name in dep.split('-'): + if service_name in self.tools_dict.keys(): + tool_set.add(service_name) + + return list(tool_set) + + def extract_rankings(self): + tool_dict = {} + category_tools_dict = {} + for tool in self.cluster_tool_list: + tool_details = self.tools_dict[tool] + for detail in tool_details: + rank = detail['rank'] + category = detail['category'] + if category not in category_tools_dict: + category_tools_dict[category] = [] + category_tools_dict[category].append(rank) + + tool_dict[tool] = tool_details + + return tool_dict, category_tools_dict + + def generate_recommendations(self): + + if not self.isClusterScanned: + self.scan_cluster() + + self.print_recommendations() + + def scan_cluster(self): + + print("\nScanning your cluster...\n") + self.pod_list, self.pod_info, self.deployments, self.namespaces = self.scanner.scan_kubernetes_deployment() + self.isClusterScanned = True + print("Cluster scanned successfully...\n") + self.cluster_tool_list = self.detect_tools_from_repo() + print("Extracted tools used in cluster...\n") + self.detailed_cluster_tool_list, self.category_cluster_tools_dict = self.extract_rankings() + + self.print_scan_results() + self.save_state() + + def print_scan_results(self): + scan_results = [] + + for tool, details in self.detailed_cluster_tool_list.items(): + first_entry = True + for detail in details: + row = [tool if first_entry else "", detail['rank'], detail['category'], self.cncf_status.get(tool, 'unlisted')] + scan_results.append(row) + first_entry = False + + print("\nThe cluster is using the following tools:\n") + print(tabulate(scan_results, headers=["Tool Name", "Rank", "Category", "CNCF Status"], tablefmt="grid")) + + def print_recommendations(self): + recommendations = [] + + for category, ranks in self.category_cluster_tools_dict.items(): + rank = ranks[0] + recommended_tool = self.category_dict[category][1]['name'] + status = self.cncf_status.get(recommended_tool, 'unlisted') + if rank == 1: + row = [category, "Already using the best", recommended_tool, status] + else: + row = [category, "Recommended tool", recommended_tool, status] + recommendations.append(row) + + print("\nOur recommended tools for this deployment are:\n") + print(tabulate(recommendations, headers=["Category", "Recommendation", "Tool Name", "CNCF Status"], tablefmt="grid")) + + + def health_check(self, change_model=False): + + if os.path.exists(LLMSTATE_PICKLE_FILEPATH) and not change_model: + continue_previous_chat = input("\nDo you want to continue fixing the previously selected pod ? (y/n): >> ") + while True: + if continue_previous_chat not in ['y', 'n']: + continue_previous_chat = input("\nPlease enter one of the given options ? (y/n): >> ") + else: + break + + if continue_previous_chat=='y': + krsllmclient = KrsGPTClient() + self.continue_chat = True + else: + krsllmclient = KrsGPTClient(reset_history=True) + + else: + krsllmclient = KrsGPTClient(reinitialize=True) + self.continue_chat = False + + if not self.continue_chat: + + self.check_scanned() + + print("\nNamespaces in the cluster:\n") + namespaces = self.list_namespaces() + namespace_len = len(namespaces) + for i, namespace in enumerate(namespaces, start=1): + print(f"{i}. {namespace}") + + self.selected_namespace_index = int(input("\nWhich namespace do you want to check the health for? Select a namespace by entering its number: >> ")) + while True: + if self.selected_namespace_index not in list(range(1, namespace_len+1)): + self.selected_namespace_index = int(input(f"\nWrong input! Select a namespace number between {1} to {namespace_len}: >> ")) + else: + break + + self.selected_namespace = namespaces[self.selected_namespace_index - 1] + pod_list = self.list_pods(self.selected_namespace) + pod_len = len(pod_list) + print(f"\nPods in the namespace {self.selected_namespace}:\n") + for i, pod in enumerate(pod_list, start=1): + print(f"{i}. {pod}") + self.selected_pod_index = int(input(f"\nWhich pod from {self.selected_namespace} do you want to check the health for? Select a pod by entering its number: >> ")) + + while True: + if self.selected_pod_index not in list(range(1, pod_len+1)): + self.selected_pod_index = int(input(f"\nWrong input! Select a pod number between {1} to {pod_len}: >> ")) + else: + break + + print("\nChecking status of the pod...") + + print("\nExtracting logs and events from the pod...") + + logs_from_pod = self.get_logs_from_pod(self.selected_namespace_index, self.selected_pod_index) + + self.logs_extracted = extract_log_entries(logs_from_pod) + + print("\nLogs and events from the pod extracted successfully!\n") + + prompt_to_llm = self.create_prompt(self.logs_extracted) + + krsllmclient.interactive_session(prompt_to_llm) + + self.save_state() + + def get_logs_from_pod(self, namespace_index, pod_index): + try: + namespace_index -= 1 + pod_index -= 1 + namespace = list(self.pod_info.keys())[namespace_index] + return list(self.pod_info[namespace][pod_index]['info']['Logs'].values())[0] + except KeyError as e: + print("\nKindly enter a value from the available namespaces and pods") + return None + + def create_prompt(self, log_entries): + prompt = "You are a DevOps expert with experience in Kubernetes. Analyze the following log entries:\n{\n" + for entry in sorted(log_entries): # Sort to maintain consistent order + prompt += f"{entry}\n" + prompt += "}\nIf there is nothing of concern in between { }, return a message stating that 'Everything looks good!'. Explain the warnings and errors and the steps that should be taken to resolve the issues, only if they exist." + return prompt + + def export_pod_info(self): + + self.check_scanned() + + with open(POD_INFO_FILEPATH, 'w') as f: + json.dump(self.pod_info, f, cls=CustomJSONEncoder) + + + def exit(self): + + try: + # List all files and directories in the given directory + files = os.listdir(KRS_DATA_DIRECTORY) + for file in files: + file_path = os.path.join(KRS_DATA_DIRECTORY, file) + # Check if it's a file and not a directory + if os.path.isfile(file_path): + os.remove(file_path) # Delete the file + print(f"Deleted file: {file_path}") + + except Exception as e: + print(f"Error occurred: {e}") + + def main(self): + self.scan_cluster() + self.generate_recommendations() + self.health_check() + + +if __name__=='__main__': + recommender = KrsMain() + recommender.main() + # logs_info = recommender.get_logs_from_pod(4,2) + # print(logs_info) + # logs = recommender.extract_log_entries(logs_info) + # print(logs) + # print(recommender.create_prompt(logs)) + diff --git a/krs/requirements.txt b/krs/requirements.txt new file mode 100644 index 0000000..f203b13 --- /dev/null +++ b/krs/requirements.txt @@ -0,0 +1,3 @@ +typer==0.12.3 +requests==2.32.2 +kubernetes==29.0.0 \ No newline at end of file diff --git a/krs/utils/__init__.py b/krs/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/krs/utils/cluster_scanner.py b/krs/utils/cluster_scanner.py new file mode 100644 index 0000000..922836b --- /dev/null +++ b/krs/utils/cluster_scanner.py @@ -0,0 +1,105 @@ +from kubernetes import client, config +import logging + +class KubetoolsScanner: + def __init__(self, get_events=True, get_logs=True, config_file='~/.kube/config'): + self.get_events = get_events + self.get_logs = get_logs + self.config_file = config_file + self.v1 = None + self.v2 = None + self.setup_kubernetes_client() + + def setup_kubernetes_client(self): + try: + config.load_kube_config(config_file=self.config_file) + self.v1 = client.AppsV1Api() + self.v2 = client.CoreV1Api() + except Exception as e: + logging.error("Failed to load Kubernetes configuration: %s", e) + raise + + def scan_kubernetes_deployment(self): + try: + deployments = self.v1.list_deployment_for_all_namespaces() + namespaces = self.list_namespaces() + except Exception as e: + logging.error("Error fetching data from Kubernetes API: %s", e) + return {}, {}, [] + + pod_dict = {} + pod_list = [] + for name in namespaces: + pods = self.list_pods(name) + pod_list += pods + pod_dict[name] = [{'name': pod, 'info': self.get_pod_info(name, pod)} for pod in pods] + + deployment_list = [dep.metadata.name for dep in deployments.items] + return pod_list, pod_dict, deployment_list, namespaces + + def list_namespaces(self): + namespaces = self.v2.list_namespace() + return [namespace.metadata.name for namespace in namespaces.items] + + def list_pods_all(self): + pods = self.v2.list_pod_for_all_namespaces() + return [pod.metadata.name for pod in pods.items] + + def list_pods(self, namespace): + pods = self.v2.list_namespaced_pod(namespace) + return [pod.metadata.name for pod in pods.items] + + def get_pod_info(self, namespace, pod, include_events=True, include_logs=True): + """ + Retrieves information about a specific pod in a given namespace. + + Args: + namespace (str): The namespace of the pod. + pod (str): The name of the pod. + include_events (bool): Flag indicating whether to include events associated with the pod. + include_logs (bool): Flag indicating whether to include logs of the pod. + + Returns: + dict: A dictionary containing the pod information, events (if include_events is True), and logs (if include_logs is True). + """ + pod_info = self.v2.read_namespaced_pod(pod, namespace) + pod_info_map = pod_info.to_dict() + pod_info_map["metadata"]["managed_fields"] = None # Clean up metadata + + info = {'PodInfo': pod_info_map} + + if include_events: + info['Events'] = self.fetch_pod_events(namespace, pod) + + if include_logs: + # Retrieve logs for all containers within the pod + container_logs = {} + for container in pod_info.spec.containers: + try: + logs = self.v2.read_namespaced_pod_log(name=pod, namespace=namespace, container=container.name) + container_logs[container.name] = logs + except Exception as e: + logging.error("Failed to fetch logs for container %s in pod %s: %s", container.name, pod, e) + container_logs[container.name] = "Error fetching logs: " + str(e) + info['Logs'] = container_logs + + return info + + def fetch_pod_events(self, namespace, pod): + events = self.v2.list_namespaced_event(namespace) + return [{ + 'Name': event.metadata.name, + 'Message': event.message, + 'Reason': event.reason + } for event in events.items if event.involved_object.name == pod] + + +if __name__ == '__main__': + + scanner = KubetoolsScanner() + pod_list, pod_info, deployments, namespaces = scanner.scan_kubernetes_deployment() + print("POD List: \n\n", pod_list) + print("\n\nPOD Info: \n\n", pod_info.keys()) + print("\n\nNamespaces: \n\n", namespaces) + print("\n\nDeployments : \n\n", deployments) + diff --git a/krs/utils/constants.py b/krs/utils/constants.py new file mode 100644 index 0000000..5632ca8 --- /dev/null +++ b/krs/utils/constants.py @@ -0,0 +1,18 @@ +KUBETOOLS_JSONPATH = 'krs/data/kubetools_data.json' +KUBETOOLS_DATA_JSONURL = 'https://raw.githubusercontent.com/Kubetools-Technologies-Inc/kubetools_data/main/data/kubetools_data.json' + +CNCF_YMLPATH = 'krs/data/landscape.yml' +CNCF_YMLURL = 'https://raw.githubusercontent.com/cncf/landscape/master/landscape.yml' +CNCF_TOOLS_JSONPATH = 'krs/data/cncf_tools.json' + +TOOLS_RANK_JSONPATH = 'krs/data/tools_rank.json' +CATEGORY_RANK_JSONPATH = 'krs/data/category_rank.json' + +LLMSTATE_PICKLE_FILEPATH = 'krs/data/llmstate.pkl' +KRSSTATE_PICKLE_FILEPATH = 'krs/data/krsstate.pkl' + +POD_INFO_FILEPATH = './exported_pod_info.json' + +MAX_OUTPUT_TOKENS = 512 + +KRS_DATA_DIRECTORY = 'krs/data' diff --git a/krs/utils/fetch_tools_krs.py b/krs/utils/fetch_tools_krs.py new file mode 100644 index 0000000..af20e27 --- /dev/null +++ b/krs/utils/fetch_tools_krs.py @@ -0,0 +1,83 @@ +import json +import requests +import yaml +from krs.utils.constants import (KUBETOOLS_DATA_JSONURL, KUBETOOLS_JSONPATH, CNCF_YMLPATH, CNCF_YMLURL, CNCF_TOOLS_JSONPATH, TOOLS_RANK_JSONPATH, CATEGORY_RANK_JSONPATH) + +# Function to convert 'githubStars' to a float, or return 0 if it cannot be converted +def get_github_stars(tool): + stars = tool.get('githubStars', 0) + try: + return float(stars) + except ValueError: + return 0.0 + +# Function to download and save a file +def download_file(url, filename): + response = requests.get(url) + response.raise_for_status() # Ensure we notice bad responses + with open(filename, 'wb') as file: + file.write(response.content) + +def parse_yaml_to_dict(yaml_file_path): + with open(yaml_file_path, 'r') as file: + data = yaml.safe_load(file) + + cncftools = {} + + for category in data.get('landscape', []): + for subcategory in category.get('subcategories', []): + for item in subcategory.get('items', []): + item_name = item.get('name').lower() + project_status = item.get('project', 'listed') + cncftools[item_name] = project_status + + return {'cncftools': cncftools} + +def save_json_file(jsondict, jsonpath): + + # Write the category dictionary to a new JSON file + with open(jsonpath, 'w') as f: + json.dump(jsondict, f, indent=4) + + +def krs_tool_ranking_info(): + # New dictionaries + tools_dict = {} + category_tools_dict = {} + + download_file(KUBETOOLS_DATA_JSONURL, KUBETOOLS_JSONPATH) + download_file(CNCF_YMLURL, CNCF_YMLPATH) + + with open(KUBETOOLS_JSONPATH) as f: + data = json.load(f) + + for category in data: + # Sort the tools in the current category by the number of GitHub stars + sorted_tools = sorted(category['tools'], key=get_github_stars, reverse=True) + + for i, tool in enumerate(sorted_tools, start=1): + tool["name"] = tool['name'].replace("\t", "").lower() + tool['ranking'] = i + + # Update tools_dict + tools_dict.setdefault(tool['name'], []).append({ + 'rank': i, + 'category': category['category']['name'], + 'url': tool['link'] + }) + + # Update ranked_tools_dict + category_tools_dict.setdefault(category['category']['name'], {}).update({i: {'name': tool['name'], 'url': tool['link']}}) + + + cncf_tools_dict = parse_yaml_to_dict(CNCF_YMLPATH) + save_json_file(cncf_tools_dict, CNCF_TOOLS_JSONPATH) + save_json_file(tools_dict, TOOLS_RANK_JSONPATH) + save_json_file(category_tools_dict, CATEGORY_RANK_JSONPATH) + + return tools_dict, category_tools_dict, cncf_tools_dict + +if __name__=='__main__': + tools_dict, category_tools_dict, cncf_tools_dict = krs_tool_ranking_info() + print(cncf_tools_dict) + diff --git a/krs/utils/functional.py b/krs/utils/functional.py new file mode 100644 index 0000000..b4e9b17 --- /dev/null +++ b/krs/utils/functional.py @@ -0,0 +1,75 @@ +from difflib import SequenceMatcher +import re, json +from datetime import datetime + +class CustomJSONEncoder(json.JSONEncoder): + """JSON Encoder for complex objects not serializable by default json code.""" + def default(self, obj): + if isinstance(obj, datetime): + # Format datetime object as a string in ISO 8601 format + return obj.isoformat() + # Let the base class default method raise the TypeError + return json.JSONEncoder.default(self, obj) + +def similarity(a, b): + return SequenceMatcher(None, a, b).ratio() + +def filter_similar_entries(log_entries): + unique_entries = list(log_entries) + to_remove = set() + + # Compare each pair of log entries + for i in range(len(unique_entries)): + for j in range(i + 1, len(unique_entries)): + if similarity(unique_entries[i], unique_entries[j]) > 0.85: + # Choose the shorter entry to remove, or either if they are the same length + if len(unique_entries[i]) > len(unique_entries[j]): + to_remove.add(unique_entries[i]) + else: + to_remove.add(unique_entries[j]) + + # Filter out the highly similar entries + filtered_entries = {entry for entry in unique_entries if entry not in to_remove} + return filtered_entries + +def extract_log_entries(log_contents): + # Patterns to match different log formats + patterns = [ + re.compile(r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{6}Z\s+(warn|error)\s+\S+\s+(.*)', re.IGNORECASE), + re.compile(r'[WE]\d{4} \d{2}:\d{2}:\d{2}.\d+\s+\d+\s+(.*)'), + re.compile(r'({.*})') + ] + + log_entries = set() + # Attempt to match each line with all patterns + for line in log_contents.split('\n'): + for pattern in patterns: + match = pattern.search(line) + if match: + if match.groups()[0].startswith('{'): + # Handle JSON formatted log entries + try: + log_json = json.loads(match.group(1)) + if 'severity' in log_json and log_json['severity'].lower() in ['error', 'warning']: + level = "Error" if log_json['severity'] == "ERROR" else "Warning" + message = log_json.get('error', '') if 'error' in log_json.keys() else line + log_entries.add(f"{level}: {message.strip()}") + elif 'level' in log_json: + level = "Error" if log_json['level'] == "error" else "Warning" + message = log_json.get('msg', '') + log_json.get('error', '') + log_entries.add(f"{level}: {message.strip()}") + except json.JSONDecodeError: + continue # Skip if JSON is not valid + else: + if len(match.groups()) == 2: + level, message = match.groups() + elif len(match.groups()) == 1: + message = match.group(1) # Assuming error as default + level = "ERROR" # Default if not specified in the log + + level = "Error" if "error" in level.lower() else "Warning" + formatted_message = f"{level}: {message.strip()}" + log_entries.add(formatted_message) + break # Stop after the first match + + return filter_similar_entries(log_entries) \ No newline at end of file diff --git a/krs/utils/llm_client.py b/krs/utils/llm_client.py new file mode 100644 index 0000000..f46265a --- /dev/null +++ b/krs/utils/llm_client.py @@ -0,0 +1,202 @@ +import pickle +import subprocess +import os, time +from krs.utils.constants import (MAX_OUTPUT_TOKENS, LLMSTATE_PICKLE_FILEPATH) + +class KrsGPTClient: + + def __init__(self, reinitialize=False, reset_history=False): + + self.reinitialize = reinitialize + self.client = None + self.pipeline = None + self.provider = None + self.model = None + self.openai_api_key = None + self.continue_chat = False + self.history = [] + self.max_tokens = MAX_OUTPUT_TOKENS + + + if not self.reinitialize: + print("\nLoading LLM State..") + self.load_state() + print("\nModel: ", self.model) + if not self.model: + self.initialize_client() + + self.history = [] if reset_history == True else self.history + + if self.history: + continue_chat = input("\n\nDo you want to continue previous chat ? (y/n) >> ") + while continue_chat not in ['y', 'n']: + print("Please enter either y or n!") + continue_chat = input("\nDo you want to continue previous chat ? (y/n) >> ") + if continue_chat == 'No': + self.history = [] + else: + self.continue_chat = True + + def save_state(self, filename=LLMSTATE_PICKLE_FILEPATH): + state = { + 'provider': self.provider, + 'model': self.model, + 'history': self.history, + 'openai_api_key': self.openai_api_key + } + with open(filename, 'wb') as output: + pickle.dump(state, output, pickle.HIGHEST_PROTOCOL) + + def load_state(self): + try: + with open(LLMSTATE_PICKLE_FILEPATH, 'rb') as f: + state = pickle.load(f) + self.provider = state['provider'] + self.model = state['model'] + self.history = state.get('history', []) + self.openai_api_key = state.get('openai_api_key', '') + if self.provider == 'OpenAI': + self.init_openai_client(reinitialize=True) + elif self.provider == 'huggingface': + self.init_huggingface_client(reinitialize=True) + except (FileNotFoundError, EOFError): + pass + + def install_package(self, package_name): + import importlib + try: + importlib.import_module(package_name) + print(f"\n{package_name} is already installed.") + except ImportError: + print(f"\nInstalling {package_name}...", end='', flush=True) + result = subprocess.run(['pip', 'install', package_name], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if result.returncode == 0: + print(f" \n{package_name} installed successfully.") + else: + print(f" \nFailed to install {package_name}.") + + + def initialize_client(self): + if not self.client and not self.pipeline: + choice = input("\nChoose the model provider for healthcheck: \n\n[1] OpenAI \n[2] Huggingface\n\n>> ") + if choice == '1': + self.init_openai_client() + elif choice == '2': + self.init_huggingface_client() + else: + raise ValueError("Invalid option selected") + + def init_openai_client(self, reinitialize=False): + + if not reinitialize: + print("\nInstalling necessary libraries..........") + self.install_package('openai') + + import openai + from openai import OpenAI + + self.provider = 'OpenAI' + self.openai_api_key = input("\nEnter your OpenAI API key: ") if not reinitialize else self.openai_api_key + self.model = input("\nEnter the OpenAI model name: ") if not reinitialize else self.model + + self.client = OpenAI(api_key=self.openai_api_key) + + if not reinitialize or self.reinitialize: + while True: + try: + self.validate_openai_key() + break + except openai.error.AuthenticationError: + self.openai_api_key = input("\nInvalid Key! Please enter the correct OpenAI API key: ") + except openai.error.InvalidRequestError as e: + print(e) + self.model = input("\nEnter an OpenAI model name from latest OpenAI docs: ") + except openai.APIConnectionError as e: + print(e) + self.init_openai_client(reinitialize=False) + + self.save_state() + + def init_huggingface_client(self, reinitialize=False): + + if not reinitialize: + print("\nInstalling necessary libraries..........") + self.install_package('transformers') + self.install_package('torch') + + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + + import warnings + from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer + + warnings.filterwarnings("ignore", category=FutureWarning) + + self.provider = 'huggingface' + self.model = input("\nEnter the Huggingface model name: ") if not reinitialize else self.model + + try: + self.tokenizer = AutoTokenizer.from_pretrained(self.model) + self.model_hf = AutoModelForCausalLM.from_pretrained(self.model) + self.pipeline = pipeline('text-generation', model=self.model_hf, tokenizer=self.tokenizer) + + except OSError as e: + print("\nError loading model: ", e) + print("\nPlease enter a valid Huggingface model name.") + self.init_huggingface_client(reinitialize=True) + + self.save_state() + + def validate_openai_key(self): + """Validate the OpenAI API key by attempting a small request.""" + response = self.client.chat.completions.create( + model=self.model, + messages=[{"role": "user", "content": "Test prompt, do nothing"}], + max_tokens=5 + ) + print("API key and model are valid.") + + def infer(self, prompt): + self.history.append({"role": "user", "content": prompt}) + input_prompt = self.history_to_prompt() + + if self.provider == 'OpenAI': + response = self.client.chat.completions.create( + model=self.model, + messages=input_prompt, + max_tokens = self.max_tokens + ) + output = response.choices[0].message.content.strip() + + elif self.provider == 'huggingface': + responses = self.pipeline(input_prompt, max_new_tokens=self.max_tokens) + output = responses[0]['generated_text'] + + self.history.append({"role": "assistant", "content": output}) + print(">> ", output) + + def interactive_session(self, prompt_input): + print("\nInteractive session started. Type 'end chat' to exit from the session!\n") + + if self.continue_chat: + print('>> ', self.history[-1]['content']) + else: + initial_prompt = prompt_input + self.infer(initial_prompt) + + while True: + prompt = input("\n>> ") + if prompt.lower() == 'end chat': + break + self.infer(prompt) + self.save_state() + + def history_to_prompt(self): + if self.provider == 'OpenAI': + return self.history + elif self.provider == 'huggingface': + return " ".join([item["content"] for item in self.history]) + +if __name__ == "__main__": + client = KrsGPTClient(reinitialize=False) + # client.interactive_session("You are an 8th grade math tutor. Ask questions to gauge my expertise so that you can generate a training plan for me.") + diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c7d02b8 --- /dev/null +++ b/setup.py @@ -0,0 +1,28 @@ +from setuptools import setup, find_packages + +# Read the requirements.txt file for dependencies +with open('krs/requirements.txt') as f: + requirements = f.read().splitlines() + +setup( + name='krs', + version='0.1.0', + description='Kubernetes Recommendation Service with LLM integration', + author='Abhijeet Mazumdar', + author_email='abhijeet@kubetools.ca', + url='https://github.com/KrsGPTs/krs', + packages=find_packages(), + include_package_data=True, + install_requires=requirements, + entry_points={ + 'console_scripts': [ + 'krs=krs.krs:app', # Adjust the module and function path as needed + ], + }, + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + ], + python_requires='>=3.6', +)