add-assistant #2

Merged
Ari merged 8 commits from add-assistant into main 2024-08-06 01:14:25 +00:00
Showing only changes of commit 14fecdadf2 - Show all commits

View File

@ -94,24 +94,22 @@ class ConfigureAssistant:
Preprocess files based on their type. Preprocess files based on their type.
:param file_path: The file path to preprocess. :param file_path: The file path to preprocess.
:return: The new filename and a flag indicating if the file should be uploaded. :return: The filename to be used during upload and a flag indicating if the file should be uploaded.
""" """
extension = file_path.split(".")[-1].lower() extension = file_path.split(".")[-1].lower()
# TO DO: Preprocess Outjob and PcbDoc files into something OpenAI (or future vector DB) can understand
excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"] excluded_extensions = ["schdoc", "exe", "so", "dll", "outjob", "pcbdoc", "png"]
# If the extension is not in supported formats and not in excluded extensions, rename the file # If the extension is not in supported formats and not in excluded extensions, spoof the file name
if extension not in self.SUPPORTED_FORMATS and extension not in excluded_extensions: if extension not in self.SUPPORTED_FORMATS and extension not in excluded_extensions:
new_file_path = f"{file_path}.txt" new_file_name = f"{os.path.basename(file_path)}.txt"
os.rename(file_path, new_file_path) print(f"Spoofing unsupported file: {file_path} as {new_file_name}")
print(f"Renamed unsupported file: {file_path} to {new_file_path}") return new_file_name, True
return new_file_path, True
elif extension in self.SUPPORTED_FORMATS: elif extension in self.SUPPORTED_FORMATS:
return file_path, True return os.path.basename(file_path), True
else: else:
print(f"Skipping excluded file: {file_path}") print(f"Skipping excluded file: {file_path}")
return file_path, False return os.path.basename(file_path), False
def create_vector_store(self): def create_vector_store(self):
""" """
@ -133,10 +131,10 @@ class ConfigureAssistant:
if not should_upload: if not should_upload:
continue continue
try: try:
with open(new_filename, "rb") as file_stream: with open(path, "rb") as file_stream:
file_content = file_stream.read() file_content = file_stream.read()
spoofed_file = io.BytesIO(file_content) spoofed_file = io.BytesIO(file_content)
spoofed_file.name = os.path.basename(new_filename) # Spoof the filename spoofed_file.name = new_filename # Spoof the filename
# Upload the file to the vector store # Upload the file to the vector store
file_batch = ( file_batch = (
self.client.beta.vector_stores.file_batches.upload_and_poll( self.client.beta.vector_stores.file_batches.upload_and_poll(