from mlteam_utils import print_object
from openai import OpenAI
client = OpenAI()

# Upload a file with an "assistants" purpose
file = client.files.create(
  file=open("data/AG_news_samples.csv", "rb"),
  purpose='assistants'
)

# Create an assistant with 'code_interpreter' enabled
assistant = client.beta.assistants.create(
  name = "CSV Analyzer",
  instructions="You are an assistant that does exploratory data analysis given a dataset as a CSV file. When asked a question about the data, write and run code to answer the question.",
  model="gpt-4o-mini",
  tools=[{"type": "code_interpreter"}]
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="How many news are labeled as 'Sports'?",
    attachments= [
        { "file_id": file.id, "tools": [{"type": "code_interpreter"}] }
      ]
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'requires_action', 'cancelling']:
        continue
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        break
    break

Run is completed

# Get the response
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

for message in reversed(messages.data):
  print(f"_______Role: {message.role}_______\n")
  print(f"{message.content[0].text.value}\n")

_______Role: user_______

How many news are labeled as 'Sports'?

_______Role: assistant_______

First, I will load the dataset to understand its structure and locate the relevant column that identifies the labels (for example, categories like 'Sports'). Let's start by examining the contents of the file.

_______Role: assistant_______

The dataset contains a column named 'label' which specifies various categories for each news item, such as 'World', 'Sci/Tech', and 'Sports'. To answer your question, I will count how many news items are labeled as 'Sports'.

_______Role: assistant_______

There are 491 news items labeled as 'Sports' in the dataset.

# Add a new message to the previously created thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Can you give me a plot diagram with 'label' in the x axis, and count in the y axis?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'requires_action', 'cancelling']:
        continue
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        break
    break

Run is completed

# Get the response
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

# Print only the last message (which is assistant's response)
message = messages.data[0]
for i, content in enumerate(message.content):
  if content.type == 'text':
    print(f"{content.text.value}\n")
  elif content.type == 'image_file':
    image_data = client.files.content(content.image_file.file_id)
    image_data_bytes = image_data.read()
    # Print the image
    from IPython.display import display, Image
    display(Image(data=image_data_bytes))
    # You can also save to a file
    with open("output/plot-image.png", "wb") as file:
      file.write(image_data_bytes)
      
    # Cleanup the file from OpenAI
    client.files.delete(content.image_file.file_id)

Here is the bar plot that displays the count of news items for each category in the dataset. The x-axis shows the different labels (news categories), and the y-axis shows the count of news items in each category.

# Add a new message to the previously created thread
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Can you create a new CSV file by removing all news in 'Business' category?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'requires_action', 'cancelling']:
        continue
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        break
    break

Run is completed

# Our response will only have `file_path` annotations. But the following code also handles `file_citation` annotations.

# Get the response
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

import os

# Print only the last message (which is assistant's response)
text = messages.data[0].content[0].text.value
print(f"_______Original text:_______\n{text}")

annotations = messages.data[0].content[0].text.annotations
citations = []
for i, annotation in enumerate(annotations):
    if (file_path := getattr(annotation, 'file_path', None)):
        cited_file = client.files.retrieve(file_path.file_id)
        cited_file_content = client.files.content(file_path.file_id)
        file_name = "output/" + os.path.basename(cited_file.filename)
        with open(file_name, "wb") as file:
          file.write(cited_file_content.read())
        text = text.replace(annotation.text, file_name)
    elif (file_citation := getattr(annotation, 'file_citation', None)):
        text = text.replace(annotation.text, f'[{i}]')
        cited_file = client.files.retrieve(file_citation.file_id)
        citations.append(f'[{i}] {file_citation.quote} from {cited_file.filename}')

# Add footnotes to the end of the message before displaying to user
if len(citations) > 0: 
  text += '\n' + '\n'.join(citations)
print(f"_______Processed text:_______\n{text}")

_______Original text:_______
The news items labeled as 'Business' have been removed, and the filtered dataset has been saved to a new CSV file. You can download it using the following link:

[Download filtered news data](sandbox:/mnt/data/filtered_news_data.csv)
_______Processed text:_______
The news items labeled as 'Business' have been removed, and the filtered dataset has been saved to a new CSV file. You can download it using the following link:

[Download filtered news data](output/filtered_news_data.csv)

run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)

for step in run_steps:
    if (tool_calls := getattr(step.step_details, 'tool_calls', None)):
        print_object(tool_calls)

[
    {
        "id": "call_OKJox0GyLzVISYl54frHvvdJ",
        "code_interpreter": {
            "input": "# Filter out news items labeled as 'Business'\nfiltered_data = data[data['label'] != 'Business']\n\n# Save the filtered data to a new CSV file\nfiltered_file_path = '/mnt/data/filtered_news_data.csv'\nfiltered_data.to_csv(filtered_file_path, index=False)\nfiltered_file_path",
            "outputs": [
                {
                    "logs": "'/mnt/data/filtered_news_data.csv'",
                    "type": "logs"
                }
            ]
        },
        "type": "code_interpreter"
    }
]

# Cleanup
try:
  assistant_files = client.beta.assistants.files.list(assistant_id=assistant.id)
  for file in assistant_files:
      client.files.delete(file_id=file.id)

  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )

  for message in messages: 
    message_files = client.beta.threads.messages.files.list(
      thread_id=thread.id,
      message_id=message.id
    )
    for file in message_files:
      client.files.delete(file_id=file.id)

  client.beta.assistants.delete(assistant_id=assistant.id)
  client.beta.threads.delete(thread_id=thread.id)
except Exception as e:
  print(e)

'Assistants' object has no attribute 'files'

file_deletion_status = client.beta.assistants.files.delete(
  assistant_id=assistant.id,
  file_id=file.id
)

# Create a vector store caled "Financial Statements"
vector_store = client.beta.vector_stores.create(name="File Search Test")
 
file=open("data/Assistants_tools _OpenAI.pdf", "rb")
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=[file]
)
 
# You can print the status and the file counts of the batch to see the result of this operation. 
print(file_batch.status)
print(file_batch.file_counts)

completed
FileCounts(cancelled=0, completed=1, failed=0, in_progress=0, total=1)

# Create an assistant with 'retrieval' enabled and the file is added
assistant = client.beta.assistants.create(
    name = "OpenAI Assistant Tools",
    instructions="You are an assistant who is specialized on 'Tools that can be used with OpenAI Assistants'. You will retrieve the knowledge from the attached file to answer the questions. For the questions that are not related with your specilty, kindly reject to answer.",
    model="gpt-4o-mini",
    tools=[{"type": "file_search"}],
    tool_resources={
      "file_search": {
        "vector_store_ids": [vector_store.id]
      }
    }
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What are the different type of tools that I can use with Assistants API?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'requires_action', 'cancelling']:
        continue
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        print(run.last_error)
        break
    break

Run is completed
None

# Ask an irrelevant question, see if it rejects. Normally ChatGPT with GPT-4 is responding this question.
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What are the alternative solutions that I can use instead of Redis?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'requires_action', 'cancelling']:
        continue
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        break
    break

Run is completed

# Get the response
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

for message in reversed(messages.data):
  print(f"_______Role: {message.role}_______\n")
  # Process annotations before printing the text
  text = message.content[0].text.value
  annotations = message.content[0].text.annotations
  citations = []
  for i, annotation in enumerate(annotations):
      if (file_citation := getattr(annotation, 'file_citation', None)):
          text = text.replace(annotation.text, f'[{i}]')
          cited_file = client.files.retrieve(file_citation.file_id)
          citations.append(f'[{i}]: {cited_file.filename}')
  # Add footnotes to the end of the message before displaying to user
  if len(citations) > 0: 
    text += '\n' + '\n'.join(citations)
  print(f"{text}\n")

_______Role: user_______

What are the different type of tools that I can use with Assistants API?

_______Role: assistant_______

I can help guide you on using tools compatible with the OpenAI Assistants API. Here are some common types and categories of tools you might use:

1. **Data Extraction and Parsing Tools:**
   - These tools help in extracting specific data from files, web pages, or databases.
   
2. **Data Analysis and Visualization:**
   - Tools that assist in analyzing large volumes of data and can create visual representations of data to make it easier to understand.

3. **Natural Language Processing (NLP) Tools:**
   - These enhance the assistant's ability to understand and process human language, making interactions more effective.

4. **APIs for Third-party Services:**
   - Can involve weather information, financial data, or social media interactions, which require interaction with external APIs.

5. **Collaboration and Productivity Tools:**
   - Tools that integrate with email, calendar, and project management software to enhance productivity in workflows.

6. **Automation and Scripting Tools:**
   - Useful for automating repetitive tasks including data entry, system maintenance, or batch file processing.

Each type of tool brings different capabilities that can be leveraged depending on your specific needs or the tasks you want to automate or enhance with the Assistant.

_______Role: user_______

What are the alternative solutions that I can use instead of Redis?

_______Role: assistant_______

I'm specialized in assisting with tools that can be used with OpenAI Assistants. Unfortunately, I cannot provide advice on alternative solutions to Redis or topics outside my area of focus. Please refer to other resources or experts for this information.

# Cleanup
try:
    assistant_files = client.beta.assistants.files.list(assistant_id=assistant.id)
    for file in assistant_files:
        client.files.delete(file_id=file.id)

    client.beta.assistants.delete(assistant_id=assistant.id)
    client.beta.threads.delete(thread_id=thread.id)
except Exception as e:
  print(e)

'Assistants' object has no attribute 'files'

import json

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="celsius"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "32", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

from mlteam_utils import print_object
from openai import OpenAI
client = OpenAI()

# Create an assistant with the function(s) specified under 'tools' parameter.
assistant = client.beta.assistants.create(
    name = "Wheather Bot",
    instructions="You are a weather bot. Use the provided functions to answer questions.",
    model="gpt-4o-mini",
    tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
    ]
)

thread = client.beta.threads.create()

message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="What is the weather like in Paris and Tokyo?"
)

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Wait for Run to be completed
import time

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'cancelling']:
        continue
    if run.status in ['requires_action']:
        print_object(run.required_action)
        break
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        print(run.last_error)
        break
    break

{
    "submit_tool_outputs": {
        "tool_calls": [
            {
                "id": "call_5Bzj5lVC8doQ56cENYWM1Yr1",
                "function": {
                    "arguments": "{\"location\": \"Paris\"}",
                    "name": "get_current_weather"
                },
                "type": "function"
            },
            {
                "id": "call_gB27Z1CXRWlNS3FSl0DLZSJw",
                "function": {
                    "arguments": "{\"location\": \"Tokyo\"}",
                    "name": "get_current_weather"
                },
                "type": "function"
            }
        ]
    },
    "type": "submit_tool_outputs"
}

# The model can provide multiple functions to call at once using parallel function calling
if run.status in ['requires_action']:
    tool_outputs = []
    for call in run.required_action.submit_tool_outputs['tool_calls']:
        # Execute each funciton that the assistant asked you to call        
        function_to_call = globals().get(call['function']['name'])
        function_args = json.loads(call['function']['arguments'])
        function_response = function_to_call(
            location=function_args.get("location"),
            unit=function_args.get("unit"),
        )

        # Store the tool outputs to pass back to the assistant
        tool_outputs.append({
            "tool_call_id": call['id'],
            "output": function_response
        })

# Run the asssistant again with the tool_outputs
run = client.beta.threads.runs.submit_tool_outputs(
  thread_id=thread.id,
  run_id=run.id,
  tool_outputs=tool_outputs
)

while True:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
      thread_id=thread.id,
      run_id=run.id
    )
    if run.status in ['queued', 'in_progress', 'cancelling']:
        continue
    if run.status in ['requires_action']:
        break
    if run.status in ["completed", "expired", "failed", "cancelled"]:
        print(f"Run is {run.status}")
        print(run.last_error)
        break
    break

Run is completed
None

# Get the response
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

for message in reversed(messages.data):
  print(f"_______Role: {message.role}_______\n")
  print(f"{message.content[0].text.value}\n")

_______Role: user_______

What is the weather like in Paris and Tokyo?

_______Role: assistant_______

The current weather in:

- **Paris**: 22°C
- **Tokyo**: 10°C

# Cleanup
try:
    client.beta.assistants.delete(assistant_id=assistant.id)
    client.beta.threads.delete(thread_id=thread.id)
except Exception as e:
  print(e)

FILE FORMAT	MIME TYPE	CODE INTERPRETER	RETRIEVAL
.c	text/x-c	✓	✓
.cpp	text/x-c++	✓	✓
.csv	application/csv	✓	✓
.docx	application/vnd.openxmlformats-officedocument.wordprocessingml.document	✓	✓
.html	text/html	✓	✓
.java	text/x-java	✓	✓
.json	application/json	✓	✓
.md	text/markdown	✓	✓
.pdf	application/pdf	✓	✓
.php	text/x-php	✓	✓
.pptx	application/vnd.openxmlformats-officedocument.presentationml.presentation	✓	✓
.py	text/x-python	✓	✓
.py	text/x-script.python	✓	✓
.rb	text/x-ruby	✓	✓
.tex	text/x-tex	✓	✓
.txt	text/plain	✓	✓
.css	text/css	✓
.jpeg	image/jpeg	✓
.jpg	image/jpeg	✓
.js	text/javascript	✓
.gif	image/gif	✓
.png	image/png	✓
.tar	application/x-tar	✓
.ts	application/typescript	✓
.xlsx	application/vnd.openxmlformats-officedocument.spreadsheetml.sheet	✓
.xml	application/xml or "text/xml"	✓
.zip	application/zip	✓

Assistant Tools¶

Code Interpreter¶

Using Files with Code Interpreter¶

Reading images and files generated by Code Interpreter¶

Message annotations¶

Input/Output Logs of Code Interpreter¶

Knowledge Retrieval¶

Using Files with Knowledge Retrieval¶

Supported files¶

Retrieval pricing¶

Deleting files¶

Function Calling¶