From 8f89bf0331f3b23fcb1284e5355a3c7aab17aa7b Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Tue, 30 Jul 2024 18:00:37 -0700 Subject: [PATCH 1/8] initial commit of basic assistant. --- assistant/ConfigureAssistant.py | 117 ++++++++++++++++ assistant/OpenAIResourceManager.py | 81 ++++++++++++ assistant/QueryAssistant.py | 205 +++++++++++++++++++++++++++++ 3 files changed, 403 insertions(+) create mode 100644 assistant/ConfigureAssistant.py create mode 100644 assistant/OpenAIResourceManager.py create mode 100644 assistant/QueryAssistant.py diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py new file mode 100644 index 0000000..7528a6f --- /dev/null +++ b/assistant/ConfigureAssistant.py @@ -0,0 +1,117 @@ +import os +from openai import OpenAI + + +class ConfigureAssistant: + """ + A class to configure an OpenAI assistant for aiding designers using the ArchiMajor project. + This class handles the creation of the assistant, the setup of vector stores, and the upload of files for context. + """ + + def __init__(self, root_path: str): + """ + Initialize the ConfigureAssistant with the given root path and OpenAI client. + + :param root_path: The root directory path where support files are located. + """ + self.root_path = root_path + self.client = OpenAI() + self.assistant = None + self.vector_store = None + + def create_assistant(self): + """ + Create an OpenAI assistant with specific instructions and tools. + """ + AI_ASSISTANT_INSTRUCTIONS = """You are an assistant to aid designers who are either using the ArchiMajor board or designing a new board based off this one. They may ask questions about specific components or interconnects within this project. + + You have the following reference documents + 1. Schematic (PDF) + 2. Assortment of datasheets for chips used in design (PDF) + 3. Bill of Materials (CSV) + + These documents will provide you context to answer these questions with high level of confidence. Use your retrieval augmented generative (RAG) capabilities to leverage the files provided to you. + """ + # Create the assistant with the given instructions, model, temperature, and tools + self.assistant = self.client.beta.assistants.create( + name="A bot to answer questions about the ArchiMajor Board", + instructions=AI_ASSISTANT_INSTRUCTIONS, + model="gpt-4o", + temperature=0.1, + tools=[{"type": "file_search"}, {"type": "code_interpreter"}], + ) + + def get_file_paths(self, excluded_folders=None): + """ + Retrieve all file paths within the root directory and its subdirectories. + + :param excluded_folders: A list of folders to exclude from the file path collection. + :return: A list of file paths. + """ + excluded_folders = excluded_folders or [] + file_paths = [] + # Walk through the directory tree and collect all file paths, excluding specific folders + for root, dirs, files in os.walk(self.root_path): + if any(folder in root for folder in excluded_folders): + continue + for file in files: + file_paths.append(os.path.join(root, file)) + return file_paths + + def create_vector_store(self): + """ + Create a vector store for storing design documents. + """ + # Create a new vector store with the name "Design Documents" + self.vector_store = self.client.beta.vector_stores.create( + name="Design Documents" + ) + + def upload_files_to_vector_store(self, file_paths): + """ + Upload files to the vector store one by one and log the status of each upload. + + :param file_paths: A list of file paths to upload. + """ + for path in file_paths: + try: + with open(path, "rb") as file_stream: + # Upload the file to the vector store + file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll( + vector_store_id=self.vector_store.id, files=[file_stream] + ) + print(f"Successfully uploaded: {path}") + except Exception as e: + print(f"Failed to upload: {path} with error: {e}") + + def update_assistant_with_vector_store(self): + """ + Update the assistant to use the created vector store for file search. + """ + # Update the assistant with the new vector store's ID for file search capability + self.assistant = self.client.beta.assistants.update( + assistant_id=self.assistant.id, + tool_resources={ + "file_search": {"vector_store_ids": [self.vector_store.id]} + }, + ) + + def configure(self): + """ + Configure the assistant by creating it, retrieving file paths, creating a vector store, + uploading files to the vector store, and updating the assistant with the vector store. + """ + self.create_assistant() + file_paths = self.get_file_paths(excluded_folders=[".git"]) + self.create_vector_store() + self.upload_files_to_vector_store(file_paths) + self.update_assistant_with_vector_store() + + +if __name__ == "__main__": + # Define the root path for support files + root_path = os.path.dirname(os.path.dirname(__file__)) + # Create an instance of ConfigureAssistant with the root path + configurator = ConfigureAssistant(root_path=root_path) + # Configure the assistant + configurator.configure() diff --git a/assistant/OpenAIResourceManager.py b/assistant/OpenAIResourceManager.py new file mode 100644 index 0000000..22cfb49 --- /dev/null +++ b/assistant/OpenAIResourceManager.py @@ -0,0 +1,81 @@ +import os +import logging +from openai import OpenAI + + +class OpenAIResourceManager: + """ + A class to manage OpenAI resources such as assistants, vector stores, and files. + Provides methods to delete all resources of each type. + """ + + def __init__(self, api_key: str): + """ + Initialize the OpenAIResourceManager with the given API key and configure logging. + + :param api_key: The API key for OpenAI. + """ + self.client = OpenAI(api_key=api_key) + logging.basicConfig(level=logging.INFO) + + def delete_all_assistants(self): + """ + Delete all assistants associated with the OpenAI account. + """ + try: + # Retrieve the list of all assistants + assistants = self.client.beta.assistants.list() + # Loop through each assistant and delete it + for assistant in assistants.data: + self.client.beta.assistants.delete(assistant.id) + logging.info(f"Deleted assistant: {assistant.id}") + except Exception as e: + logging.error(f"Failed to delete assistants: {e}") + raise + + def delete_all_vector_stores(self): + """ + Delete all vector stores associated with the OpenAI account. + """ + try: + # Retrieve the list of all vector stores + vector_stores = self.client.beta.vector_stores.list() + # Loop through each vector store and delete it + for vector_store in vector_stores.data: + self.client.beta.vector_stores.delete(vector_store.id) + logging.info(f"Deleted vector store: {vector_store.id}") + except Exception as e: + logging.error(f"Failed to delete vector stores: {e}") + raise + + def delete_all_files(self): + """ + Delete all files associated with the OpenAI account. + """ + try: + # Retrieve the list of all files + files = self.client.files.list() + # Loop through each file and delete it + for file in files.data: + self.client.files.delete(file.id) + logging.info(f"Deleted file: {file.id}") + except Exception as e: + logging.error(f"Failed to delete files: {e}") + raise + + def delete_all_resources(self): + """ + Delete all assistants, vector stores, and files associated with the OpenAI account. + """ + self.delete_all_assistants() + self.delete_all_vector_stores() + self.delete_all_files() + + +if __name__ == "__main__": + # Get the OpenAI API key from the environment variables + api_key = os.getenv("OPENAI_API_KEY") + # Create an instance of the OpenAIResourceManager + manager = OpenAIResourceManager(api_key=api_key) + # Delete all resources + manager.delete_all_resources() diff --git a/assistant/QueryAssistant.py b/assistant/QueryAssistant.py new file mode 100644 index 0000000..c9c798b --- /dev/null +++ b/assistant/QueryAssistant.py @@ -0,0 +1,205 @@ +import argparse +import logging +import time +from openai import OpenAI +from openai import AssistantEventHandler +from typing_extensions import override + +# Configure logging +logging.basicConfig(level=logging.INFO) + +class QueryAssistant: + """ + A class to manage querying an OpenAI assistant. + Provides methods to create threads, send messages, and stream or fetch responses. + """ + + def __init__(self, assistant_id: str): + """ + Initialize the QueryAssistant with the given assistant ID and OpenAI client. + + :param assistant_id: The ID of the OpenAI assistant. + """ + self.client = OpenAI() + self.assistant_id = assistant_id + + def create_thread(self): + """ + Create a new thread for the assistant. + + :return: The created thread object. + """ + logging.info("Creating a new thread...") + thread = self.client.beta.threads.create() + logging.info(f"Thread created: {thread.id}") + return thread + + def create_message(self, thread_id: str, content: str): + """ + Create a message in the specified thread with the given content. + + :param thread_id: The ID of the thread. + :param content: The content of the message. + :return: The created message object. + """ + logging.info(f"Creating message in thread {thread_id}...") + message = self.client.beta.threads.messages.create( + thread_id=thread_id, role="user", content=content + ) + logging.info("Message created") + return message + + def stream_response(self, thread_id: str): + """ + Stream the response from the assistant for the specified thread. + + :param thread_id: The ID of the thread. + """ + logging.info(f"Streaming response for thread {thread_id}...") + with self.client.beta.threads.runs.stream( + thread_id=thread_id, + assistant_id=self.assistant_id, + event_handler=self.EventHandler(), + ) as stream: + stream.until_done() + logging.info("Response streaming completed") + + def fetch_response(self, thread_id: str): + """ + Fetch the response from the assistant for the specified thread (non-streaming). + + :param thread_id: The ID of the thread. + """ + logging.info(f"Fetching response for thread {thread_id}...") + run = self.client.beta.threads.runs.create_and_poll( + thread_id=thread_id, + assistant_id=self.assistant_id + ) + + # Poll the run status with a delay to reduce the number of GET requests + while run.status != 'completed' and run.status != 'failed': + time.sleep(2) # Add a 2-second delay between checks + run = self.client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id) + logging.info(f"Run status: {run.status}") + + if run.status == 'completed': + messages = self.client.beta.threads.messages.list(thread_id=thread_id).data + for message in messages: + if message.role == 'assistant': + for content in message.content: + if content.type == 'text': + print(content.text.value) + else: + logging.error(f"Run failed with status: {run.status}") + if run.incomplete_details: + logging.error(f"Incomplete details: {run.incomplete_details}") + + class EventHandler(AssistantEventHandler): + """ + A class to handle events from the assistant's response stream. + """ + + @override + def on_text_created(self, text) -> None: + """ + Handle the event when text is created by the assistant. + + :param text: The created text. + """ + logging.info("Text created by assistant") + print(f"\nassistant > ", end="", flush=True) + + @override + def on_text_delta(self, delta, snapshot): + """ + Handle the event when there is a delta in the assistant's response. + + :param delta: The response delta. + :param snapshot: The snapshot of the response. + """ + print(delta.value, end="", flush=True) + + def on_tool_call_created(self, tool_call): + """ + Handle the event when a tool call is created by the assistant. + + :param tool_call: The created tool call. + """ + logging.info(f"Tool call created: {tool_call.type}") + print(f"\nassistant > {tool_call.type}\n", flush=True) + + def on_tool_call_delta(self, delta, snapshot): + """ + Handle the event when there is a delta in the assistant's tool call. + + :param delta: The tool call delta. + :param snapshot: The snapshot of the tool call. + """ + if delta.type == "code_interpreter": + if delta.code_interpreter.input: + print(delta.code_interpreter.input, end="", flush=True) + if delta.code_interpreter.outputs: + print(f"\n\noutput >", flush=True) + for output in delta.code_interpreter.outputs: + if output.type == "logs": + print(f"\n{output.logs}", flush=True) + +def main(query: str, assistant_id: str, context: str, use_streaming: bool): + """ + The main function to run the assistant query. + + :param query: The query to ask the assistant. + :param assistant_id: The ID of the assistant. + :param context: The context to set before the query. + :param use_streaming: Boolean flag to determine if streaming should be used. + """ + assistant = QueryAssistant(assistant_id=assistant_id) + thread = assistant.create_thread() + # Merge the context and query into a single message + full_query = f"Context: {context}\nQuery: {query}" + # Print the full query + print("\n" + "=" * 100) + print(f"{full_query}") + print("=" * 100 + "\n") + # Send the message + assistant.create_message(thread_id=thread.id, content=full_query) + if use_streaming: + assistant.stream_response(thread_id=thread.id) + else: + assistant.fetch_response(thread_id=thread.id) + print("\n") + +if __name__ == "__main__": + # Default query and context + DEFAULT_QUERY = "What are you capable of as an assistant?" + DEFAULT_CONTEXT = "Use your vector store to answer questions about the Arty A7 Evaluation Board. Take time to understand the context and introspect yourself. If you don't know the answer simply respond with 'I don't know'. It is NEVER okay to return an empty response." + + # Parse command line arguments + parser = argparse.ArgumentParser(description="Run an assistant query.") + parser.add_argument( + "--query", + type=str, + default=DEFAULT_QUERY, + help="The query to ask the assistant.", + ) + parser.add_argument( + "--assistant_id", + type=str, + default="asst_JUXTF2T6n3RFDjkolNqdtPxj", + help="The assistant ID to use.", + ) + parser.add_argument( + "--context", + type=str, + default=DEFAULT_CONTEXT, + help="The context to set before the query.", + ) + parser.add_argument( + "--use-streaming", + action="store_true", + help="Flag to determine if streaming should be used.", + ) + + # Run the main function with parsed arguments + args = parser.parse_args() + main(args.query, args.assistant_id, args.context, args.use_streaming) -- 2.40.3 From f614536a6feb64bc8c53cac94326d1547e3c500e Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Wed, 31 Jul 2024 18:00:20 -0700 Subject: [PATCH 2/8] Adding filename spoof. --- assistant/ConfigureAssistant.py | 50 +++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index 7528a6f..028f504 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -1,13 +1,19 @@ import os +import io from openai import OpenAI - class ConfigureAssistant: """ A class to configure an OpenAI assistant for aiding designers using the ArchiMajor project. This class handles the creation of the assistant, the setup of vector stores, and the upload of files for context. """ + SUPPORTED_FORMATS = { + "c", "cpp", "css", "csv", "docx", "gif", "html", "java", "jpeg", "jpg", "js", + "json", "md", "pdf", "php", "png", "pptx", "py", "rb", "tar", "tex", "ts", "txt", + "webp", "xlsx", "xml", "zip" + } + def __init__(self, root_path: str): """ Initialize the ConfigureAssistant with the given root path and OpenAI client. @@ -23,7 +29,7 @@ class ConfigureAssistant: """ Create an OpenAI assistant with specific instructions and tools. """ - AI_ASSISTANT_INSTRUCTIONS = """You are an assistant to aid designers who are either using the ArchiMajor board or designing a new board based off this one. They may ask questions about specific components or interconnects within this project. + AI_ASSISTANT_INSTRUCTIONS = """You are an assistant to aid designers who are either using the ArchiMajor board or designing a new board based off this one. They may ask questions about specific components or interconnects within this project. You have the following reference documents 1. Schematic (PDF) @@ -58,6 +64,23 @@ class ConfigureAssistant: file_paths.append(os.path.join(root, file)) return file_paths + def preprocess_file(self, file_path): + """ + Preprocess files based on their type. + + :param file_path: The file path to preprocess. + :return: The new filename and a flag indicating if the file should be uploaded. + """ + extension = file_path.split(".")[-1].lower() + if extension in self.SUPPORTED_FORMATS: + return os.path.basename(file_path), True + elif extension == "schdoc": + print(f"Skipping unsupported file: {file_path}") + return file_path, False + else: + new_filename = os.path.basename(file_path) + ".txt" + return new_filename, True + def create_vector_store(self): """ Create a vector store for storing design documents. @@ -74,15 +97,21 @@ class ConfigureAssistant: :param file_paths: A list of file paths to upload. """ for path in file_paths: + new_filename, should_upload = self.preprocess_file(path) + if not should_upload: + continue try: with open(path, "rb") as file_stream: + file_content = file_stream.read() + spoofed_file = io.BytesIO(file_content) + spoofed_file.name = new_filename # Spoof the filename # Upload the file to the vector store file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll( - vector_store_id=self.vector_store.id, files=[file_stream] + vector_store_id=self.vector_store.id, files=[spoofed_file] ) - print(f"Successfully uploaded: {path}") + print(f"Successfully uploaded: {new_filename}") except Exception as e: - print(f"Failed to upload: {path} with error: {e}") + print(f"Failed to upload: {new_filename} with error: {e}") def update_assistant_with_vector_store(self): """ @@ -113,5 +142,16 @@ if __name__ == "__main__": root_path = os.path.dirname(os.path.dirname(__file__)) # Create an instance of ConfigureAssistant with the root path configurator = ConfigureAssistant(root_path=root_path) + + # Retrieve file paths + file_paths = configurator.get_file_paths(excluded_folders=[".git"]) + + # Preprocess and observe the files that will be uploaded + print("Files to be uploaded:") + for path in file_paths: + new_filename, should_upload = configurator.preprocess_file(path) + if should_upload: + print(f"Original: {path}, New: {new_filename}") + # Configure the assistant configurator.configure() -- 2.40.3 From e1220e71650c6f58c834fe939a52eb0c6c7a5350 Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Sun, 4 Aug 2024 16:34:39 -0700 Subject: [PATCH 3/8] Updated context, formatting, and info display. --- assistant/ConfigureAssistant.py | 16 +++-- assistant/OpenAIResourceManager.py | 108 ++++++++++++++++++++++++----- assistant/QueryAssistant.py | 2 +- 3 files changed, 102 insertions(+), 24 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index 028f504..bea892f 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -9,9 +9,10 @@ class ConfigureAssistant: """ SUPPORTED_FORMATS = { - "c", "cpp", "css", "csv", "docx", "gif", "html", "java", "jpeg", "jpg", "js", + "c", "cpp", "css", "docx", "gif", "html", "java", "jpeg", "jpg", "js", "json", "md", "pdf", "php", "png", "pptx", "py", "rb", "tar", "tex", "ts", "txt", - "webp", "xlsx", "xml", "zip" + "webp", "xlsx", "xml", "zip", + # "csv", # CSV is supported but not actually parsed so we're going to treat it as text } def __init__(self, root_path: str): @@ -72,14 +73,15 @@ class ConfigureAssistant: :return: The new filename and a flag indicating if the file should be uploaded. """ extension = file_path.split(".")[-1].lower() - if extension in self.SUPPORTED_FORMATS: + + # TO DO: Preprocess Outjob and PcbDoc files into something OpenAI (or future vector DB) can understand + excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] + + if extension in self.SUPPORTED_FORMATS and extension not in excluded_extensions: return os.path.basename(file_path), True - elif extension == "schdoc": + else: print(f"Skipping unsupported file: {file_path}") return file_path, False - else: - new_filename = os.path.basename(file_path) + ".txt" - return new_filename, True def create_vector_store(self): """ diff --git a/assistant/OpenAIResourceManager.py b/assistant/OpenAIResourceManager.py index 22cfb49..6f86fa4 100644 --- a/assistant/OpenAIResourceManager.py +++ b/assistant/OpenAIResourceManager.py @@ -1,12 +1,12 @@ import os import logging +import pandas as pd from openai import OpenAI - class OpenAIResourceManager: """ A class to manage OpenAI resources such as assistants, vector stores, and files. - Provides methods to delete all resources of each type. + Provides methods to delete all resources of each type and display them in tables. """ def __init__(self, api_key: str): @@ -18,15 +18,52 @@ class OpenAIResourceManager: self.client = OpenAI(api_key=api_key) logging.basicConfig(level=logging.INFO) + def get_all_assistants(self): + """ + Retrieve all assistants associated with the OpenAI account. + + :return: A list of assistants. + """ + try: + assistants = self.client.beta.assistants.list() + return assistants.data + except Exception as e: + logging.error(f"Failed to retrieve assistants: {e}") + raise + + def get_all_vector_stores(self): + """ + Retrieve all vector stores associated with the OpenAI account. + + :return: A list of vector stores. + """ + try: + vector_stores = self.client.beta.vector_stores.list() + return vector_stores.data + except Exception as e: + logging.error(f"Failed to retrieve vector stores: {e}") + raise + + def get_all_files(self): + """ + Retrieve all files associated with the OpenAI account. + + :return: A list of files. + """ + try: + files = self.client.files.list() + return files.data + except Exception as e: + logging.error(f"Failed to retrieve files: {e}") + raise + def delete_all_assistants(self): """ Delete all assistants associated with the OpenAI account. """ try: - # Retrieve the list of all assistants - assistants = self.client.beta.assistants.list() - # Loop through each assistant and delete it - for assistant in assistants.data: + assistants = self.get_all_assistants() + for assistant in assistants: self.client.beta.assistants.delete(assistant.id) logging.info(f"Deleted assistant: {assistant.id}") except Exception as e: @@ -38,10 +75,8 @@ class OpenAIResourceManager: Delete all vector stores associated with the OpenAI account. """ try: - # Retrieve the list of all vector stores - vector_stores = self.client.beta.vector_stores.list() - # Loop through each vector store and delete it - for vector_store in vector_stores.data: + vector_stores = self.get_all_vector_stores() + for vector_store in vector_stores: self.client.beta.vector_stores.delete(vector_store.id) logging.info(f"Deleted vector store: {vector_store.id}") except Exception as e: @@ -53,10 +88,8 @@ class OpenAIResourceManager: Delete all files associated with the OpenAI account. """ try: - # Retrieve the list of all files - files = self.client.files.list() - # Loop through each file and delete it - for file in files.data: + files = self.get_all_files() + for file in files: self.client.files.delete(file.id) logging.info(f"Deleted file: {file.id}") except Exception as e: @@ -71,11 +104,54 @@ class OpenAIResourceManager: self.delete_all_vector_stores() self.delete_all_files() + def truncate_string(self, s, max_length=50): + """ + Truncate a string to a maximum length with ellipsis. + + :param s: The string to truncate. + :param max_length: The maximum length of the string. + :return: The truncated string. + """ + return (s[:max_length] + '...') if len(s) > max_length else s + + def show_all_assistants(self): + """ + Display all assistants in a table. + """ + assistants = self.get_all_assistants() + assistant_data = [{k: self.truncate_string(str(v), max_length=25) for k, v in assistant.dict().items()} for assistant in assistants] + df = pd.DataFrame(assistant_data) + print("Assistants:") + print(df.to_markdown(index=False)) + + def show_all_vector_stores(self): + """ + Display all vector stores in a table. + """ + vector_stores = self.get_all_vector_stores() + vector_store_data = [{k: self.truncate_string(str(v)) for k, v in vector_store.dict().items()} for vector_store in vector_stores] + df = pd.DataFrame(vector_store_data) + print("Vector Stores:") + print(df.to_markdown(index=False)) + + def show_all_files(self): + """ + Display all files in a table. + """ + files = self.get_all_files() + file_data = [{k: self.truncate_string(str(v)) for k, v in file.dict().items()} for file in files] + df = pd.DataFrame(file_data) + print("Files:") + print(df.to_markdown(index=False)) + if __name__ == "__main__": # Get the OpenAI API key from the environment variables api_key = os.getenv("OPENAI_API_KEY") # Create an instance of the OpenAIResourceManager manager = OpenAIResourceManager(api_key=api_key) - # Delete all resources - manager.delete_all_resources() + + # Show all resources in tables + manager.show_all_assistants() + manager.show_all_vector_stores() + manager.show_all_files() diff --git a/assistant/QueryAssistant.py b/assistant/QueryAssistant.py index c9c798b..ef409f7 100644 --- a/assistant/QueryAssistant.py +++ b/assistant/QueryAssistant.py @@ -172,7 +172,7 @@ def main(query: str, assistant_id: str, context: str, use_streaming: bool): if __name__ == "__main__": # Default query and context DEFAULT_QUERY = "What are you capable of as an assistant?" - DEFAULT_CONTEXT = "Use your vector store to answer questions about the Arty A7 Evaluation Board. Take time to understand the context and introspect yourself. If you don't know the answer simply respond with 'I don't know'. It is NEVER okay to return an empty response." + DEFAULT_CONTEXT = "Use your vector store to answer questions about the ArchiMajor Board. Take time to understand the context and introspect. If you don't know the answer simply respond with 'I don't know'. It is NEVER okay to return an empty response." # Parse command line arguments parser = argparse.ArgumentParser(description="Run an assistant query.") -- 2.40.3 From 4e961198ba1f78c096efd3db907a439aa2bae2b2 Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Sun, 4 Aug 2024 16:34:51 -0700 Subject: [PATCH 4/8] Format. --- assistant/ConfigureAssistant.py | 44 ++++++++++++++++++++++++------ assistant/OpenAIResourceManager.py | 21 +++++++++++--- assistant/QueryAssistant.py | 18 +++++++----- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index bea892f..bda66fb 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -2,6 +2,7 @@ import os import io from openai import OpenAI + class ConfigureAssistant: """ A class to configure an OpenAI assistant for aiding designers using the ArchiMajor project. @@ -9,9 +10,32 @@ class ConfigureAssistant: """ SUPPORTED_FORMATS = { - "c", "cpp", "css", "docx", "gif", "html", "java", "jpeg", "jpg", "js", - "json", "md", "pdf", "php", "png", "pptx", "py", "rb", "tar", "tex", "ts", "txt", - "webp", "xlsx", "xml", "zip", + "c", + "cpp", + "css", + "docx", + "gif", + "html", + "java", + "jpeg", + "jpg", + "js", + "json", + "md", + "pdf", + "php", + "png", + "pptx", + "py", + "rb", + "tar", + "tex", + "ts", + "txt", + "webp", + "xlsx", + "xml", + "zip", # "csv", # CSV is supported but not actually parsed so we're going to treat it as text } @@ -76,7 +100,7 @@ class ConfigureAssistant: # TO DO: Preprocess Outjob and PcbDoc files into something OpenAI (or future vector DB) can understand excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] - + if extension in self.SUPPORTED_FORMATS and extension not in excluded_extensions: return os.path.basename(file_path), True else: @@ -108,8 +132,10 @@ class ConfigureAssistant: spoofed_file = io.BytesIO(file_content) spoofed_file.name = new_filename # Spoof the filename # Upload the file to the vector store - file_batch = self.client.beta.vector_stores.file_batches.upload_and_poll( - vector_store_id=self.vector_store.id, files=[spoofed_file] + file_batch = ( + self.client.beta.vector_stores.file_batches.upload_and_poll( + vector_store_id=self.vector_store.id, files=[spoofed_file] + ) ) print(f"Successfully uploaded: {new_filename}") except Exception as e: @@ -144,16 +170,16 @@ if __name__ == "__main__": root_path = os.path.dirname(os.path.dirname(__file__)) # Create an instance of ConfigureAssistant with the root path configurator = ConfigureAssistant(root_path=root_path) - + # Retrieve file paths file_paths = configurator.get_file_paths(excluded_folders=[".git"]) - + # Preprocess and observe the files that will be uploaded print("Files to be uploaded:") for path in file_paths: new_filename, should_upload = configurator.preprocess_file(path) if should_upload: print(f"Original: {path}, New: {new_filename}") - + # Configure the assistant configurator.configure() diff --git a/assistant/OpenAIResourceManager.py b/assistant/OpenAIResourceManager.py index 6f86fa4..fa76638 100644 --- a/assistant/OpenAIResourceManager.py +++ b/assistant/OpenAIResourceManager.py @@ -3,6 +3,7 @@ import logging import pandas as pd from openai import OpenAI + class OpenAIResourceManager: """ A class to manage OpenAI resources such as assistants, vector stores, and files. @@ -112,14 +113,20 @@ class OpenAIResourceManager: :param max_length: The maximum length of the string. :return: The truncated string. """ - return (s[:max_length] + '...') if len(s) > max_length else s + return (s[:max_length] + "...") if len(s) > max_length else s def show_all_assistants(self): """ Display all assistants in a table. """ assistants = self.get_all_assistants() - assistant_data = [{k: self.truncate_string(str(v), max_length=25) for k, v in assistant.dict().items()} for assistant in assistants] + assistant_data = [ + { + k: self.truncate_string(str(v), max_length=25) + for k, v in assistant.dict().items() + } + for assistant in assistants + ] df = pd.DataFrame(assistant_data) print("Assistants:") print(df.to_markdown(index=False)) @@ -129,7 +136,10 @@ class OpenAIResourceManager: Display all vector stores in a table. """ vector_stores = self.get_all_vector_stores() - vector_store_data = [{k: self.truncate_string(str(v)) for k, v in vector_store.dict().items()} for vector_store in vector_stores] + vector_store_data = [ + {k: self.truncate_string(str(v)) for k, v in vector_store.dict().items()} + for vector_store in vector_stores + ] df = pd.DataFrame(vector_store_data) print("Vector Stores:") print(df.to_markdown(index=False)) @@ -139,7 +149,10 @@ class OpenAIResourceManager: Display all files in a table. """ files = self.get_all_files() - file_data = [{k: self.truncate_string(str(v)) for k, v in file.dict().items()} for file in files] + file_data = [ + {k: self.truncate_string(str(v)) for k, v in file.dict().items()} + for file in files + ] df = pd.DataFrame(file_data) print("Files:") print(df.to_markdown(index=False)) diff --git a/assistant/QueryAssistant.py b/assistant/QueryAssistant.py index ef409f7..48afa6f 100644 --- a/assistant/QueryAssistant.py +++ b/assistant/QueryAssistant.py @@ -8,6 +8,7 @@ from typing_extensions import override # Configure logging logging.basicConfig(level=logging.INFO) + class QueryAssistant: """ A class to manage querying an OpenAI assistant. @@ -72,22 +73,23 @@ class QueryAssistant: """ logging.info(f"Fetching response for thread {thread_id}...") run = self.client.beta.threads.runs.create_and_poll( - thread_id=thread_id, - assistant_id=self.assistant_id + thread_id=thread_id, assistant_id=self.assistant_id ) # Poll the run status with a delay to reduce the number of GET requests - while run.status != 'completed' and run.status != 'failed': + while run.status != "completed" and run.status != "failed": time.sleep(2) # Add a 2-second delay between checks - run = self.client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run.id) + run = self.client.beta.threads.runs.retrieve( + thread_id=thread_id, run_id=run.id + ) logging.info(f"Run status: {run.status}") - if run.status == 'completed': + if run.status == "completed": messages = self.client.beta.threads.messages.list(thread_id=thread_id).data for message in messages: - if message.role == 'assistant': + if message.role == "assistant": for content in message.content: - if content.type == 'text': + if content.type == "text": print(content.text.value) else: logging.error(f"Run failed with status: {run.status}") @@ -144,6 +146,7 @@ class QueryAssistant: if output.type == "logs": print(f"\n{output.logs}", flush=True) + def main(query: str, assistant_id: str, context: str, use_streaming: bool): """ The main function to run the assistant query. @@ -169,6 +172,7 @@ def main(query: str, assistant_id: str, context: str, use_streaming: bool): assistant.fetch_response(thread_id=thread.id) print("\n") + if __name__ == "__main__": # Default query and context DEFAULT_QUERY = "What are you capable of as an assistant?" -- 2.40.3 From 669c2c2bfab5cb04a714bd71f59fb5ff7ed7f0c6 Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Sun, 4 Aug 2024 16:39:43 -0700 Subject: [PATCH 5/8] Added wrapping. --- assistant/OpenAIResourceManager.py | 36 ++++++++++++++++++------------ 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/assistant/OpenAIResourceManager.py b/assistant/OpenAIResourceManager.py index fa76638..cf77ce2 100644 --- a/assistant/OpenAIResourceManager.py +++ b/assistant/OpenAIResourceManager.py @@ -2,7 +2,7 @@ import os import logging import pandas as pd from openai import OpenAI - +import textwrap class OpenAIResourceManager: """ @@ -105,15 +105,15 @@ class OpenAIResourceManager: self.delete_all_vector_stores() self.delete_all_files() - def truncate_string(self, s, max_length=50): + def wrap_text(self, s, width=30): """ - Truncate a string to a maximum length with ellipsis. + Wrap text to a specific width. - :param s: The string to truncate. - :param max_length: The maximum length of the string. - :return: The truncated string. + :param s: The string to wrap. + :param width: The maximum width of each line. + :return: The wrapped string. """ - return (s[:max_length] + "...") if len(s) > max_length else s + return "\n".join(textwrap.wrap(s, width)) def show_all_assistants(self): """ @@ -122,7 +122,7 @@ class OpenAIResourceManager: assistants = self.get_all_assistants() assistant_data = [ { - k: self.truncate_string(str(v), max_length=25) + k: self.wrap_text(str(v)) for k, v in assistant.dict().items() } for assistant in assistants @@ -137,7 +137,7 @@ class OpenAIResourceManager: """ vector_stores = self.get_all_vector_stores() vector_store_data = [ - {k: self.truncate_string(str(v)) for k, v in vector_store.dict().items()} + {k: self.wrap_text(str(v)) for k, v in vector_store.dict().items()} for vector_store in vector_stores ] df = pd.DataFrame(vector_store_data) @@ -150,13 +150,22 @@ class OpenAIResourceManager: """ files = self.get_all_files() file_data = [ - {k: self.truncate_string(str(v)) for k, v in file.dict().items()} + {k: self.wrap_text(str(v)) for k, v in file.dict().items()} for file in files ] df = pd.DataFrame(file_data) print("Files:") print(df.to_markdown(index=False)) + def show_all_resources(self): + """ + Display all resources in a table. + """ + self.show_all_assistants() + print("") + self.show_all_vector_stores() + print("") + self.show_all_files() if __name__ == "__main__": # Get the OpenAI API key from the environment variables @@ -164,7 +173,6 @@ if __name__ == "__main__": # Create an instance of the OpenAIResourceManager manager = OpenAIResourceManager(api_key=api_key) - # Show all resources in tables - manager.show_all_assistants() - manager.show_all_vector_stores() - manager.show_all_files() + # Show or delete all resources in tables + manager.show_all_resources() + # manager.delete_all_resources() -- 2.40.3 From d12a3a0f4bc21fcdff35944a40b5cab0e71e757a Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Mon, 5 Aug 2024 17:50:51 -0700 Subject: [PATCH 6/8] Added more capabilities. --- assistant/ConfigureAssistant.py | 18 +++++++++++------ assistant/OpenAIResourceManager.py | 31 ++++++++++++++++++++++++------ assistant/QueryAssistant.py | 2 +- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index bda66fb..0b62b1c 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -69,7 +69,7 @@ class ConfigureAssistant: instructions=AI_ASSISTANT_INSTRUCTIONS, model="gpt-4o", temperature=0.1, - tools=[{"type": "file_search"}, {"type": "code_interpreter"}], + tools=[{"type": "file_search"}, ], # {"type": "code_interpreter"}], # Code interpreter doesn't seem to return a delta properly ) def get_file_paths(self, excluded_folders=None): @@ -101,10 +101,16 @@ class ConfigureAssistant: # TO DO: Preprocess Outjob and PcbDoc files into something OpenAI (or future vector DB) can understand excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] - if extension in self.SUPPORTED_FORMATS and extension not in excluded_extensions: - return os.path.basename(file_path), True + # If the extension is not in supported formats and not in excluded extensions, rename the file + if extension not in self.SUPPORTED_FORMATS and extension not in excluded_extensions: + new_file_path = f"{file_path}.txt" + os.rename(file_path, new_file_path) + print(f"Renamed unsupported file: {file_path} to {new_file_path}") + return new_file_path, True + elif extension in self.SUPPORTED_FORMATS: + return file_path, True else: - print(f"Skipping unsupported file: {file_path}") + print(f"Skipping excluded file: {file_path}") return file_path, False def create_vector_store(self): @@ -127,10 +133,10 @@ class ConfigureAssistant: if not should_upload: continue try: - with open(path, "rb") as file_stream: + with open(new_filename, "rb") as file_stream: file_content = file_stream.read() spoofed_file = io.BytesIO(file_content) - spoofed_file.name = new_filename # Spoof the filename + spoofed_file.name = os.path.basename(new_filename) # Spoof the filename # Upload the file to the vector store file_batch = ( self.client.beta.vector_stores.file_batches.upload_and_poll( diff --git a/assistant/OpenAIResourceManager.py b/assistant/OpenAIResourceManager.py index cf77ce2..452edfd 100644 --- a/assistant/OpenAIResourceManager.py +++ b/assistant/OpenAIResourceManager.py @@ -1,7 +1,8 @@ -import os -import logging -import pandas as pd from openai import OpenAI +import argparse +import logging +import os +import pandas as pd import textwrap class OpenAIResourceManager: @@ -167,12 +168,30 @@ class OpenAIResourceManager: print("") self.show_all_files() -if __name__ == "__main__": + + +def main(): + """ + Main function that either shows or deletes resources based on the command line arguments. + """ + parser = argparse.ArgumentParser( + description="Displays or deletes all resources associated with the OpenAI account." + ) + parser.add_argument( + "--delete", action="store_true", help="Flag to delete resources instead of showing them." + ) + args = parser.parse_args() + # Get the OpenAI API key from the environment variables api_key = os.getenv("OPENAI_API_KEY") # Create an instance of the OpenAIResourceManager manager = OpenAIResourceManager(api_key=api_key) - # Show or delete all resources in tables + # Delete resources conditionally + if args.delete: + manager.delete_all_resources() + manager.show_all_resources() - # manager.delete_all_resources() + +if __name__ == "__main__": + main() diff --git a/assistant/QueryAssistant.py b/assistant/QueryAssistant.py index 48afa6f..8b8a846 100644 --- a/assistant/QueryAssistant.py +++ b/assistant/QueryAssistant.py @@ -189,7 +189,7 @@ if __name__ == "__main__": parser.add_argument( "--assistant_id", type=str, - default="asst_JUXTF2T6n3RFDjkolNqdtPxj", + default="asst_W20wgEl0ZModBiMWzcgC0E3E", help="The assistant ID to use.", ) parser.add_argument( -- 2.40.3 From 14fecdadf2aefdb4116a99575b24dcb94c2fd700 Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Mon, 5 Aug 2024 17:57:43 -0700 Subject: [PATCH 7/8] Fixed spoof. --- assistant/ConfigureAssistant.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index 0b62b1c..0d21893 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -94,24 +94,22 @@ class ConfigureAssistant: Preprocess files based on their type. :param file_path: The file path to preprocess. - :return: The new filename and a flag indicating if the file should be uploaded. + :return: The filename to be used during upload and a flag indicating if the file should be uploaded. """ extension = file_path.split(".")[-1].lower() - # TO DO: Preprocess Outjob and PcbDoc files into something OpenAI (or future vector DB) can understand excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] - # If the extension is not in supported formats and not in excluded extensions, rename the file + # If the extension is not in supported formats and not in excluded extensions, spoof the file name if extension not in self.SUPPORTED_FORMATS and extension not in excluded_extensions: - new_file_path = f"{file_path}.txt" - os.rename(file_path, new_file_path) - print(f"Renamed unsupported file: {file_path} to {new_file_path}") - return new_file_path, True + new_file_name = f"{os.path.basename(file_path)}.txt" + print(f"Spoofing unsupported file: {file_path} as {new_file_name}") + return new_file_name, True elif extension in self.SUPPORTED_FORMATS: - return file_path, True + return os.path.basename(file_path), True else: print(f"Skipping excluded file: {file_path}") - return file_path, False + return os.path.basename(file_path), False def create_vector_store(self): """ @@ -133,10 +131,10 @@ class ConfigureAssistant: if not should_upload: continue try: - with open(new_filename, "rb") as file_stream: + with open(path, "rb") as file_stream: file_content = file_stream.read() spoofed_file = io.BytesIO(file_content) - spoofed_file.name = os.path.basename(new_filename) # Spoof the filename + spoofed_file.name = new_filename # Spoof the filename # Upload the file to the vector store file_batch = ( self.client.beta.vector_stores.file_batches.upload_and_poll( -- 2.40.3 From a02e8fac997b83b81114a67ce92ddaf7139b707b Mon Sep 17 00:00:00 2001 From: Ari Mahpour Date: Mon, 5 Aug 2024 18:07:10 -0700 Subject: [PATCH 8/8] Fixed unsupported types. --- assistant/ConfigureAssistant.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/assistant/ConfigureAssistant.py b/assistant/ConfigureAssistant.py index 0d21893..7957536 100644 --- a/assistant/ConfigureAssistant.py +++ b/assistant/ConfigureAssistant.py @@ -24,11 +24,11 @@ class ConfigureAssistant: "md", "pdf", "php", - "png", + # "png", # PNG is supported but not actually parsed "pptx", "py", "rb", - "tar", + # "tar", # TAR is supported but not actually parsed "tex", "ts", "txt", @@ -98,7 +98,7 @@ class ConfigureAssistant: """ extension = file_path.split(".")[-1].lower() - excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] + excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png", "tar"] # If the extension is not in supported formats and not in excluded extensions, spoof the file name if extension not in self.SUPPORTED_FORMATS and extension not in excluded_extensions: @@ -182,8 +182,6 @@ if __name__ == "__main__": print("Files to be uploaded:") for path in file_paths: new_filename, should_upload = configurator.preprocess_file(path) - if should_upload: - print(f"Original: {path}, New: {new_filename}") # Configure the assistant configurator.configure() -- 2.40.3