{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}

import json
import numpy as np

data_path = "data/toy_chat_fine_tuning.jsonl"

# Load the dataset
with open(data_path, 'r', encoding='utf-8') as f:
    dataset = [json.loads(line) for line in f]

# Initial dataset stats
print("Num examples:", len(dataset))
print("First example:")
for message in dataset[0]["messages"]:
    print(message)

# Format error checks

from collections import defaultdict

format_errors = defaultdict(int)

for ex in dataset:
    if not isinstance(ex, dict):
        format_errors["data_type"] += 1
        continue
        
    messages = ex.get("messages", None)
    if not messages:
        format_errors["missing_messages_list"] += 1
        continue
        
    for message in messages:
        if "role" not in message or "content" not in message:
            format_errors["message_missing_key"] += 1
        
        if any(k not in ("role", "content", "name", "function_call") for k in message):
            format_errors["message_unrecognized_key"] += 1
        
        if message.get("role", None) not in ("system", "user", "assistant", "function"):
            format_errors["unrecognized_role"] += 1
            
        content = message.get("content", None)
        function_call = message.get("function_call", None)
        
        if (not content and not function_call) or not isinstance(content, str):
            format_errors["missing_content"] += 1
    
    if not any(message.get("role", None) == "assistant" for message in messages):
        format_errors["example_missing_assistant_message"] += 1

if format_errors:
    print("Found errors:")
    for k, v in format_errors.items():
        print(f"{k}: {v}")
else:
    print("No errors found")

import tiktoken # for token counting

encoding = tiktoken.get_encoding("cl100k_base")

# not exact!
# simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

# Warnings and tokens counts
n_missing_system = 0
n_missing_user = 0
n_messages = []
convo_lens = []
assistant_message_lens = []

for ex in dataset:
    messages = ex["messages"]
    if not any(message["role"] == "system" for message in messages):
        n_missing_system += 1
    if not any(message["role"] == "user" for message in messages):
        n_missing_user += 1
    n_messages.append(len(messages))
    convo_lens.append(num_tokens_from_messages(messages))
    assistant_message_lens.append(num_assistant_tokens_from_messages(messages))
    
print("Num examples missing system message:", n_missing_system)
print("Num examples missing user message:", n_missing_user)
print_distribution(n_messages, "num_messages_per_example")
print_distribution(convo_lens, "num_total_tokens_per_example")
print_distribution(assistant_message_lens, "num_assistant_tokens_per_example")
n_too_long = sum(l > 4096 for l in convo_lens)
print(f"\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning")

# Pricing and default n_epochs estimate
MAX_TOKENS_PER_EXAMPLE = 4096

TARGET_EPOCHS = 3
MIN_TARGET_EXAMPLES = 100
MAX_TARGET_EXAMPLES = 25000
MIN_DEFAULT_EPOCHS = 1
MAX_DEFAULT_EPOCHS = 25

n_epochs = TARGET_EPOCHS
n_train_examples = len(dataset)
if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:
    n_epochs = min(MAX_DEFAULT_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)
elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:
    n_epochs = max(MIN_DEFAULT_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)

n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)
print(f"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training")
print(f"By default, you'll train for {n_epochs} epochs on this dataset")
print(f"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens")

from openai import OpenAI
client = OpenAI()

file_object = client.files.create(
  file=open(data_path, "rb"),
  purpose="fine-tune"
)

fine_tuning_job = client.fine_tuning.jobs.create(
  training_file=file_object.id, 
  model="gpt-4o-mini",
  suffix="for_mlteam",
  hyperparameters={
    "n_epochs":"auto",
    "batch_size":"auto",
    "learning_rate_multiplier":"auto"
  }
)

# In addition to creating a fine-tuning job, you can also list existing jobs, retrieve the status of a job, or cancel a job.
import time
import datetime

# List 10 fine-tuning jobs
client.fine_tuning.jobs.list(limit=10)

# Retrieve the state of a fine-tune
# status can be ['validating_files', 'queued', 'running', 'succeeded', 'failed', 'cancelled']
while True:
    time.sleep(5)
    fine_tuning_job = client.fine_tuning.jobs.retrieve(fine_tuning_job.id)
    if fine_tuning_job.status in ['validating_files', 'queued', 'running']:
        continue
    if fine_tuning_job.status == "succeeded":
        created_at = datetime.datetime.fromtimestamp(fine_tuning_job.created_at)
        print(f"New fine-tuned model '{fine_tuning_job.fine_tuned_model}' created at {created_at} from base model '{fine_tuning_job.model}'.")
        break
    if fine_tuning_job.status == "failed":
        print("Fine-tuning job failed:", fine_tuning_job.error)
        break
    else:
        print("Fine-tuning job is cancelled.")
        break

# List up to 10 events from a fine-tuning job
client.fine_tuning.jobs.list_events(fine_tuning_job_id=fine_tuning_job.id, limit=10)

# Cancel a job
#client.fine_tuning.jobs.cancel(fine_tuning_job.id)

response = client.chat.completions.create(
  model=fine_tuning_job.fine_tuned_model,
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ]
)
print(response.choices[0].message.content)

{
    "object": "fine_tuning.job.event",
    "id": "ftevent-abc-123",
    "created_at": 1693582679,
    "level": "info",
    "message": "Step 100/100: training loss=0.00",
    "data": {
        "step": 100,
        "train_loss": 1.805623287509661e-5,
        "train_mean_token_accuracy": 1.0
    },
    "type": "metrics"
}

import pandas as pd

fine_tuning_job = client.fine_tuning.jobs.retrieve(fine_tuning_job.id)

for i, file_id in enumerate(fine_tuning_job.result_files):
    content = client.files.content(file_id)
    # save content to file
    with open(f"output/fine_tuning_result{i}.csv", "wb") as f:
        f.write(content.text.encode("utf-8"))
    df = pd.read_csv(f"output/fine_tuning_result{i}.csv")
    print(df.head(2))

# Cleanup

# Delete a fine-tuned model (must be an owner of the org the model was created in)
client.models.delete(fine_tuning_job.fine_tuned_model)

# NBVAL_SKIP

# Upload the training data
from openai import OpenAI
client = OpenAI()

file_object = client.files.create(
  file=open("data/weather_chat_fine_tuning.jsonl", "rb"),
  purpose="fine-tune"
)

# NBVAL_SKIP

# Start a fine tuning job
fine_tuning_job = client.fine_tuning.jobs.create(
  training_file=file_object.id, 
  model="gpt-4o-mini",
  suffix="weather"
)

# NBVAL_SKIP

# Wait till the new model is created
while True:
    time.sleep(5)
    fine_tuning_job = client.fine_tuning.jobs.retrieve(fine_tuning_job.id)
    if fine_tuning_job.status in ['validating_files', 'queued', 'running']:
        continue
    if fine_tuning_job.status == "succeeded":
        break
    if fine_tuning_job.status == "failed":
        print("Fine-tuning job failed:", fine_tuning_job.error)
        break
    else:
        print("Fine-tuning job is cancelled.")
        break

# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
import json

def get_current_weather(location, unit="fahrenheit"):
    """Get the current weather in a given location"""
    if "tokyo" in location.lower():
        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
    elif "san francisco" in location.lower():
        return json.dumps({"location": "San Francisco", "temperature": "72", "unit": unit})
    elif "paris" in location.lower():
        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
    else:
        return json.dumps({"location": location, "temperature": "unknown"})

# NBVAL_SKIP

# Step 1: send the conversation and available functions to the model
messages = [{"role": "user", "content": "What's the weather like in Istanbul?"}]
tools = [
{
    "type": "function",
    "function": {
        "name": "get_current_weather",
        "description": "Get the current weather in a given location",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The city and state, e.g. San Francisco, CA",
                },
                "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
            },
            "required": ["location"],
        },
    },
}
]

# Use the fine-tuned model
response = client.chat.completions.create(
    model=fine_tuning_job.fine_tuned_model,
    messages=messages,
    tools=tools
)
response_message = response.choices[0].message
tool_calls = response_message.tool_calls
# Step 2: check if the model wanted to call a function
if tool_calls:
    # Step 3: call the function
    # Note: the JSON response may not always be valid; be sure to handle errors
    available_functions = {
        "get_current_weather": get_current_weather,
    }  # only one function in this example, but you can have multiple
    messages.append(response_message)  # extend conversation with assistant's reply
    # Step 4: send the info for each function call and function response to the model
    for tool_call in tool_calls:
        function_name = tool_call.function.name
        function_to_call = available_functions[function_name]
        function_args = json.loads(tool_call.function.arguments)
        function_response = function_to_call(
            location=function_args.get("location"),
            unit=function_args.get("unit"),
        )
        messages.append(
            {
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": function_response,
            }
        )  # extend conversation with function response
    # Again use the fine-tuned model
    second_response = client.chat.completions.create(
        model=fine_tuning_job.fine_tuned_model,
        messages=messages,
    )  # get a new response from the model where it can see the function response
    print(second_response.choices[0].message.content)

# NBVAL_SKIP

# Cleanup
client.models.delete(fine_tuning_job.fine_tuned_model)

Fine Tuning in OpenAI¶

Introduction¶

Common Use Cases¶

Preparing Your Dataset¶

Example Format¶

Example count¶

Token limits¶

Estimate costs¶

Format validation¶

Token Counting Utilities¶

Data Warnings and Token Count¶

Cost Estimation¶

Upload the Training File¶

Creating a Fine-Tuned Model¶

Using your Fine-Tuned Model¶

Analysing your Fine-Tuned Model¶

Iterate¶

Fine-Tuning with Function Calls¶