Skip to content
Snippets Groups Projects
Commit e8bef562 authored by Maxime MORGE's avatar Maxime MORGE
Browse files

First commit

parent 655244f3
No related branches found
No related tags found
No related merge requests found
Showing
with 7423 additions and 0 deletions
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
attr==0.3.2
autogen_agentchat==0.4.7
autogen_core==0.4.7
autogen_ext==0.4.7
ConfigParser==7.1.0
contextlib2==21.6.0
cryptography==44.0.1
docutils==0.21.2
HTMLParser==0.0.2
importlib_metadata==8.5.0
importlib_metadata==8.0.0
ipython==8.12.3
ipywidgets==8.1.5
Jinja2==3.1.5
jnius==1.1.0
keyring==25.6.0
lockfile==0.12.2
matplotlib==3.10.1
mock==5.1.0
numpy==2.2.3
pandas==2.2.3
Pillow==11.1.0
protobuf==5.29.3
pydantic==2.10.6
pyOpenSSL==25.0.0
railroad==0.5.0
scipy==1.15.2
seaborn==0.13.2
Sphinx==8.2.1
thread==2.0.5
tornado==6.4.2
truststore==0.10.1
urllib3_secure_extra==0.1.0
xmlrpclib==1.0.1
import os
import asyncio
from typing import Dict
from pydantic import BaseModel
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient
import json
# Load API key from environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("Missing OPENAI_API_KEY. Set it as an environment variable.")
# Define the expected response format as a Pydantic model
class AgentResponse(BaseModel):
my_share: int
other_share: int
motivation: str
# The dictator game simulation class
class Dictator:
def __init__(self, amount: int, model: str, temperature: float, max_retries: int = 3):
self.amount = amount
self.model = model
self.temperature = temperature
self.max_retries = max_retries # Maximum retry attempts in case of hallucinations
is_openai_model = model.startswith("gpt")
base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
model_info = {
"temperature": self.temperature,
"function_calling": True,
"parallel_tool_calls": True,
"family": "unknown",
"json_output": True,
"vision": False
}
self.model_client = OpenAIChatCompletionClient(
model=self.model,
base_url=base_url,
api_key=OPENAI_API_KEY,
model_info=model_info,
response_format=AgentResponse
)
async def run(self) -> Dict:
"""Runs the model and ensures a valid response."""
instruction = f"""
Hi, let’s play a game. You are paired with another player.
Your role is to decide how to divide ${self.amount}, and the other player simply receives your choice.
How would you like to divide the money?
Please provide one specific choice, stating how much you keep and how much you give to the other player.
Return the response in JSON format with `my_share`, `other_share`, and `motivation`.
"""
for attempt in range(self.max_retries):
agent = AssistantAgent(
name="Dictator",
model_client=self.model_client,
system_message="You are a helpful assistant."
)
response = await agent.on_messages(
[TextMessage(content=instruction, source="user")],
cancellation_token=CancellationToken(),
)
try:
response_data = response.chat_message.content
agent_response = AgentResponse.model_validate_json(response_data) # Parse JSON
my_share, other_share = agent_response.my_share, agent_response.other_share
# Validate values
if 0 <= my_share <= self.amount and 0 <= other_share <= self.amount and my_share + other_share <= self.amount:
return agent_response.model_dump()
else:
print(f"Invalid response detected (Attempt {attempt+1}): {response_data}")
except Exception as e:
print(f"Error parsing response (Attempt {attempt+1}): {e}")
raise ValueError("Model failed to provide a valid response after multiple attempts.")
# Run the async function and return the response
if __name__ == "__main__":
game_agent = Dictator(amount=100, model="gpt-4.5-preview-2025-02-27", temperature=0.7)
response_json = asyncio.run(game_agent.get_valid_response())
print(response_json)
\ No newline at end of file
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Définition de la palette de couleurs
color_palette = {
'gpt-4.5-preview-2025-02-27': '#7abaff', # bleuEvasion
'llama3': '#32a68c', # vertAvenir
'mistral-small': '#ff6941', # orangeChaleureux
'deepseek-r1': '#5862ed' # indigoInclusif
}
# Charger les données
data = pd.read_csv("../../data/dictator/dictator.csv") # Remplacez par le bon chemin vers votre fichier CSV
# Création du boxplot
plt.figure(figsize=(8, 6))
sns.boxplot(
data=data,
x="model",
y="my_share",
hue="model", # Explicitly assign hue
palette=color_palette,
showfliers=False,
legend=False # Disable legend since hue is just for color mapping
)
# Ajouter médiane, premier et troisième quartile
for i, model in enumerate(data["model"].unique()):
subset = data[data["model"] == model]["my_share"]
median = subset.median()
q1 = subset.quantile(0.25)
q3 = subset.quantile(0.75)
plt.text(i, median, f"{median:.0f}", ha='center', va='bottom', fontsize=10, fontweight='bold', color='black')
plt.text(i, q1, f"{q1:.0f}", ha='center', va='top', fontsize=9, color='black')
plt.text(i, q3, f"{q3:.0f}", ha='center', va='bottom', fontsize=9, color='black')
# Labels et titre
plt.xlabel("Model")
plt.ylabel("Share of money allocated to oneself")
plt.title("Distribution of self-allocated share per model in the dictator game")
# Sauvegarde et affichage
plt.savefig("../../figures/dictator_boxplot.svg", format="svg")
\ No newline at end of file
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Definition of the color palette
color_palette = {
'gpt-4.5-preview-2025-02-27': '#7abaff', # BlueEscape
'llama3': '#32a68c', # GreenFuture
'mistral-small': '#ff6941', # WarmOrange
'deepseek-r1': '#5862ed' # InclusiveIndigo
}
# Load the data
data = pd.read_csv("../../data/dictator/dictator.csv") # Replace with the correct path to your CSV file
# Create the violin plot
plt.figure(figsize=(8, 6))
sns.violinplot(
data=data,
x="model",
y="my_share",
hue="model", # Use hue to manage the colors
palette=color_palette,
inner="quartile", # Displays quartiles inside the violin
density_norm="width" # Normalizes the width of the violins for comparison
)
# Set the y-axis limits between 0 and 100
plt.ylim(0, 100)
# Labels and title
plt.xlabel("Model")
plt.ylabel("Share of money assigned to oneself")
plt.title("Distribution of personal share by model in the dictator game")
# Save and display
plt.savefig("../../figures/dictator_violin.svg", format="svg")
\ No newline at end of file
import asyncio
import statistics
import pandas as pd
from dictator import Dictator
class DictatorExperiment:
debug = True
def __init__(self, models: list[str], temperature: float, amount: int, iterations: int, output_file: str):
self.models = models
self.temperature = temperature
self.amount = amount
self.iterations = iterations
self.output_file = output_file # Path to the CSV output file
# Initialize the CSV file with headers
with open(self.output_file, 'w', encoding='utf-8') as f:
f.write("iteration,model,temperature,amount,my_share,other_share,motivation\n")
async def run_experiment(self):
# Run the dictator game for each model
for model in self.models:
if self.debug:
print(f"Running experiment for model: {model}")
# Run the dictator game for the specified number of iterations
for iteration in range(1, self.iterations + 1):
game_agent = Dictator(amount=self.amount, model=model, temperature=self.temperature)
response = await game_agent.run()
if self.debug:
print(response)
my_share = response['my_share']
other_share = response['other_share']
motivation = response.get('motivation', "").replace('"', '""') # Échapper les guillemets internes
# Log individual iteration result
with open(self.output_file, 'a', encoding='utf-8') as f:
f.write(f'{iteration},{model},{self.temperature},{self.amount},{my_share},{other_share},"{motivation}"\n')
# Running the experiment
if __name__ == "__main__":
models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1"]
temperature = 0.7
amount = 100
iterations = 30
output_file = '../../data/dictator/dictator.csv' # Define your CSV output file path
# Create an experiment object
experiment = DictatorExperiment(models=models, temperature=temperature, amount=amount, iterations=iterations, output_file=output_file)
# Run the experiment asynchronously
asyncio.run(experiment.run_experiment())
print(f"Experiment results saved to {output_file}")
\ No newline at end of file
# Maxime MORGE <maxime.morge@univ-lyon1.fr>
import os
import asyncio
import json
from typing import Literal, Dict
from pydantic import BaseModel
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient
from welfare import Welfare
# Load API key from environment variable
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# Validate API key
if not OPENAI_API_KEY:
raise ValueError("Missing OPENAI_API_KEY. Set it as an environment variable.")
class AgentResponse(BaseModel):
my_share: Literal[500, 100, 400, 325]
other_share: Literal[100, 500, 300, 325]
lost: Literal[400, 400, 300, 350]
motivation: str
@classmethod
def validate_combination(cls, values):
"""Ensure (my_share, other_share, lost) is a valid predefined combination"""
valid_combinations = {
(500, 100, 400),
(100, 500, 400),
(400, 300, 300),
(325, 325, 350)
}
if (values.get("my_share"), values.get("other_share"), values.get("lost")) not in valid_combinations:
raise ValueError("Invalid share combination")
return values
class Config:
validate_assignment = True # Ensure values are always validated
class DictatorSetup:
debug = False
def __init__(self, model: str, temperature: float, preference: Welfare):
self.model = model
self.temperature = temperature
self.preference = preference
# Determine if using OpenAI model (like gpt-4)
is_openai_model = model.startswith("gpt")
base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
# Define model information
self.model_info = {
"temperature": self.temperature,
"function_calling": True,
"parallel_tool_calls": True,
"family": "unknown",
"json_output": True , # Add this line to ensure JSON output is supported
"vision": False
}
# Create the model client
self.model_client = OpenAIChatCompletionClient(
model=self.model,
base_url=base_url,
api_key=OPENAI_API_KEY,
model_info=self.model_info,
response_format = AgentResponse # Only set for OpenAI models that support it
)
async def run(self) -> Dict:
"""Runs the dictator game and returns a detailed response dictionary."""
instruction = f"""
Hi, let’s play a game. You are paired with another player.
Your role is to decide how to divide $1000, and the other player simply receives your choice.
Money can be lost in the process.
{self.preference.description}
How would you like to divide the money?
Choose one of the following options:
- my_share: 500, other_share: 100, lost: 400
- my_share: 100, other_share: 500, lost: 400
- my_share: 400, other_share: 300, lost: 300
- my_share: 325, other_share: 325, lost: 350
Please provide one specific choice, stating how much you keep, how much you give, and how much is lost.
"""
agent = AssistantAgent(
name="Dictator",
model_client=self.model_client,
system_message="You are a helpful assistant."
)
response = await agent.on_messages(
[TextMessage(content=instruction, source="user")],
cancellation_token=CancellationToken(),
)
# Extract JSON content from response
response_data = response.chat_message.content
if self.debug:
print(f"Raw Response: {response_data}")
# Convert JSON string to dictionary before validation
response_dict = json.loads(response_data)
agent_response = AgentResponse.model_validate(response_dict)
# Check if response is consistent with the specified preference
is_consistent = self.check_consistency(agent_response)
return {
"is_consistent": is_consistent,
"my_share": agent_response.my_share,
"other_share": agent_response.other_share,
"lost": agent_response.lost,
"motivations": agent_response.motivation
}
def check_consistency(self, agent_response: AgentResponse) -> bool:
"""Check if the response aligns with the given preference."""
valid_choices = {
Welfare.SELFISH: (500, 100, 400),
Welfare.ALTRUISTIC: (100, 500, 400),
Welfare.UTILITARIAN: (400, 300, 300),
Welfare.EGALITARIAN: (325, 325, 350),
}
expected_values = valid_choices.get(self.preference, None)
if expected_values:
return (
agent_response.my_share == expected_values[0] and
agent_response.other_share == expected_values[1] and
agent_response.lost == expected_values[2]
)
return False
# Run the async function and return the response
if __name__ == "__main__":
# Example usage: setting preference to 'Egalitarian'
preference = Welfare.EGALITARIAN # Can be Selfish, Altruistic, etc.
game_agent = DictatorSetup(model="gpt-4.5-preview-2025-02-27", temperature=0.7, preference=preference) # or llama3, mistral-small, deepseek-r1
response = asyncio.run(game_agent.run())
print(response) # Prints detailed response with is_consistent, my_share, other_share, lost, motivations
import pandas as pd
# Charger les résultats de l'expérience
df = pd.read_csv("../../data/dictator/dictator_setup.csv")
# Calculer l'accuracy par modèle et par préférence
accuracy_table = df.groupby(["Model", "Preference"])["is_consistent"].mean().unstack()
# Afficher le tableau
print(accuracy_table)
# Sauvegarder le tableau en format CSV pour une utilisation future
accuracy_table.to_csv("../../figures/dictator/dictator_setup_accuracy.csv")
\ No newline at end of file
import asyncio
import os
import pandas as pd
from src.dictator.dictator_setup import DictatorSetup
from welfare import Welfare # Import Welfare enum
class DictatorSetupExperiment:
debug = True
def __init__(self, models: list[str], temperature: float, iterations: int, output_file: str):
self.models = models
self.temperature = temperature
self.iterations = iterations
self.output_file = output_file # Path to the CSV output file
# Helper function to escape double quotes in the motivations string
def protect_motivations(self, motivations):
if motivations:
# Échapper les guillemets doubles dans motivations en doublant les guillemets
return f'"{motivations.replace("\"", "\"\"")}"'
return motivations
async def run_experiment(self):
preferences = [Welfare.SELFISH, Welfare.ALTRUISTIC, Welfare.UTILITARIAN, Welfare.EGALITARIAN]
file_exists = os.path.isfile(self.output_file) # Check if file already exists
# Run the dictator game for each model and preference
for model in self.models:
if self.debug:
print(f"Running experiment for model: {model}")
for preference in preferences:
print(f"Running with preference: {preference.name}")
for iteration in range(1, self.iterations + 1):
print(f"Iteration: {iteration}")
# Initialize the DictatorSetup for the current model and preference
game_agent = DictatorSetup(model=model, temperature=self.temperature, preference=preference)
try:
agent_response = await game_agent.run()
is_consistent = agent_response['is_consistent']
my_share = agent_response['my_share']
other_share = agent_response['other_share']
motivations = agent_response['motivations']
# Protéger les motivations en échappant les guillemets doubles
motivations = self.protect_motivations(motivations)
except Exception as e:
print(f"Error in iteration {iteration} for model {model} ({preference.name}): {e}")
is_consistent = False
my_share, other_share, motivations = None, None, None
# Create a single-row DataFrame for the current result
df = pd.DataFrame([{
'Iteration': iteration,
'Model': model,
'Temperature': self.temperature,
'Preference': preference.name,
'is_consistent': is_consistent,
'my_share': my_share,
'other_share': other_share,
'motivations': motivations
}])
# Append results to the CSV file
df.to_csv(self.output_file, mode='a', header=not file_exists, index=False)
file_exists = True # Ensure header is only written once
if self.debug:
print(f"Result for Iteration {iteration} saved to {self.output_file}")
# Running the experiment
if __name__ == "__main__":
models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1"]
temperature = 0.7
iterations = 30
output_file = '../../data/dictator/dictator_setup.csv'
experiment = DictatorSetupExperiment(models=models, temperature=temperature, iterations=iterations, output_file=output_file)
asyncio.run(experiment.run_experiment())
print(f"Experiment results saved to {output_file}")
\ No newline at end of file
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import sem
# Load the CSV file
df = pd.read_csv("../../data/dictator/dictator_temperature.csv")
# Group data by Model and Temperature, computing the mean and standard error
summary = df.groupby(["Model", "Temperature"]).agg(
mean_my_share=("My Share", "mean"),
sem_my_share=("My Share", sem) # Standard error of the mean
).reset_index()
# Define a custom color palette for the models
color_palette = {
'gpt-4.5-preview-2025-02-27': '#7abaff', # BlueEscape
'llama3': '#32a68c', # vertAvenir
'mistral-small': '#ff6941', # orangeChaleureux
'deepseek-r1': '#5862ed' # indigoInclusif
}
# Plot settings
plt.figure(figsize=(10, 6))
# Iterate through models to plot each one
for i, model in enumerate(summary["Model"].unique()):
subset = summary[summary["Model"] == model]
# Plot the mean My Share with custom color
plt.plot(subset["Temperature"], subset["mean_my_share"], marker="o", label=model, color=color_palette[model])
# Fill the confidence interval with custom color
plt.fill_between(
subset["Temperature"],
subset["mean_my_share"] - 1.96 * subset["sem_my_share"], # Lower bound (95% CI)
subset["mean_my_share"] + 1.96 * subset["sem_my_share"], # Upper bound (95% CI)
alpha=0.2,
color=color_palette[model] # Use model-specific color for CI fill
)
# Set the y-axis limits between 0 and 100
plt.xlim(0, 1)
# Labels and title
plt.xlabel("Temperature")
plt.ylabel("My Share")
plt.title("My Share vs Temperature with Confidence Interval")
plt.legend(title="Model")
plt.ylim(20, 70) # Ensure the y-axis is between 0 and 100
plt.grid(True)
# Save the figure as an SVG file
plt.savefig('../../figures/dictator_temperature.svg', format='svg')
# Show plot
plt.show()
\ No newline at end of file
import asyncio
import pandas as pd
from dictator import Dictator
class DictatorTemperatureExperiment:
debug = True
def __init__(self, models: list[str], temperatures: list[float], amount: int, iterations: int, output_file: str):
self.models = models
self.temperatures = temperatures
self.amount = amount
self.iterations = iterations
self.output_file = output_file # Path to the CSV output file
async def run_experiment(self):
results = []
hallucinations = {model: 0 for model in self.models} # Track hallucinations for each model
# Loop through different temperatures
for temp in self.temperatures:
# Loop through different models
for model in self.models:
if self.debug:
print(f"Running experiment for model: {model} at temperature: {temp}")
# Loop through iterations
for iteration in range(1, self.iterations + 1):
valid_response = False
while not valid_response:
game_agent = Dictator(amount=self.amount, model=model, temperature=temp)
response = await game_agent.run()
# Check if the shares are within the valid range [0, 100]
if 0 <= response['my_share'] <= 100 and 0 <= response['other_share'] <= 100:
valid_response = True
else:
# If hallucination, increment the hallucination counter and rerun the experiment for this iteration
hallucinations[model] += 1
if self.debug:
print(f"Hallucination detected for model: {model} at iteration {iteration}. Re-running...")
# Debugging output
if self.debug:
print(f"Iteration {iteration} - Temp: {temp}, Model: {model}, My Share: {response['my_share']}, Other Share: {response['other_share']}")
# Append results as a single row
results.append({
'Iteration': iteration,
'Model': model,
'Temperature': temp,
'Amount': self.amount,
'My Share': response['my_share'],
'Other Share': response['other_share']
})
# Convert to DataFrame and save to CSV after each iteration
df = pd.DataFrame(results)
df.to_csv(self.output_file, index=False)
# Print hallucination count for each model at the end of the experiment
for model, count in hallucinations.items():
print(f"Model: {model} had {count} hallucinations.")
return df
# Running the experiment
if __name__ == "__main__":
models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1"]
temperatures = [round(i * 0.1, 1) for i in range(7,11)] # Temperatures from 0.0 to 1.0 with a step of 0.1
amount = 100
iterations = 30
output_file = '../../data/dictator/dictator_temperature.csv'
experiment = DictatorTemperatureExperiment(models=models, temperatures=temperatures, amount=amount, iterations=iterations, output_file=output_file)
df = asyncio.run(experiment.run_experiment())
print(f"Experiment results saved to {output_file}")
# Model: gpt-4.5 had no hallucination.
# Model: llama3 had 1 hallucinations.
# Model: mistral-small had 2 hallucinations.
# Model: deepseek-r1 had 2 hallucinations.
\ No newline at end of file
from enum import Enum
class Welfare(Enum):
EGALITARIAN = "Egalitarian", "You have a stronger preference for fairness between players and maximize the minimum income."
UTILITARIAN = "Utilitarian", "You have a stronger preference for common interest and maximize the joint income."
SELFISH = "Selfish", "You have a stronger preference for your own interest and maximize your own income."
ALTRUISTIC = "Altruist", "You have a stronger preference for another player’s interest and maximize another player’s income."
def __init__(self, label, description):
self._label = label
self._description = description
@property
def label(self):
return self._label
@property
def description(self):
return self._description
\ No newline at end of file
This diff is collapsed.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Path to the CSV file
CSV_FILE_PATH = "../../data/rps/rps.csv"
# Load the data
df = pd.read_csv(CSV_FILE_PATH)
# Convert necessary columns to appropriate types
df["idRound"] = df["idRound"].astype(int)
df["outcomeRound"] = df["outcomeRound"].astype(float)
# List of opponent strategies to consider
opponent_strategies = ["R-P", "P-S", "S-R"]
# **Fix Warning**: Ensure we work with a full copy
df_filtered = df[df["opponentStrategy"].isin(opponent_strategies)].copy()
# Custom color palette for models
color_palette = {
'gpt-4.5-preview-2025-02-27': '#7abaff', # BlueEscape
'llama3': '#32a68c', # vertAvenir
'mistral-small': '#ff6941', # orangeChaleureux
'deepseek-r1': '#5862ed' # indigoInclusif
}
# Group by model and round number, compute mean and standard deviation
summary = df_filtered.groupby(["model", "idRound"]).agg(
mean_outcome=("outcomeRound", "mean"),
std_outcome=("outcomeRound", "std"),
count=("outcomeRound", "count")
).reset_index()
# Compute standard error (SEM)
summary["sem"] = summary["std_outcome"] / np.sqrt(summary["count"])
# Compute 95% confidence intervals
summary["ci_upper"] = summary["mean_outcome"] + (1.96 * summary["sem"])
summary["ci_lower"] = summary["mean_outcome"] - (1.96 * summary["sem"])
# Set the figure size
plt.figure(figsize=(10, 6))
# Loop through each model and plot its performance with confidence interval
for model in summary["model"].unique():
df_model = summary[summary["model"] == model]
# Plot mean outcome
plt.plot(df_model["idRound"], df_model["mean_outcome"],
label=model,
color=color_palette.get(model, '#333333')) # Default to dark gray if model not in palette
# Plot confidence interval as a shaded region
plt.fill_between(df_model["idRound"],
df_model["ci_lower"], df_model["ci_upper"],
color=color_palette.get(model, '#333333'),
alpha=0.2) # Transparency for better visibility
# Add legends and labels
plt.xlim(1, 10)
plt.xlabel("Round Number")
plt.ylabel("Average Points Earned")
plt.title("Average Points Earned per Round Against 2-Loop Behaviour (95% CI)")
plt.legend()
plt.grid(True)
plt.ylim(0, 2) # Points are between 0 and 2
# Save the figure as an SVG file
plt.savefig('../../figures/rps_2loop.svg', format='svg')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment