Skip to content
Snippets Groups Projects
Commit f99d5280 authored by Maxime MORGE's avatar Maxime MORGE
Browse files

Test strategy for dictator game

parent fcfb4d62
No related branches found
No related tags found
No related merge requests found
......@@ -19,6 +19,9 @@ The figure below presents a violin plot depicting the share of the total amount
that the dictator allocates to themselves for each model.
The temperature is fixed at 0.7, and each experiment was conducted 30 times.
The median share taken by GPT-4.5, Llama3, Mistral-Small, and DeepSeek-R1 is 50.
When we prompt the models to generate a strategy in the form of an algorithm implemented
in the Python programming language, rather than generating an action, all models divide
the amount fairly except for GPT-4.5, which takes approximately 70% of the total amount for itself.
It is worth noticing that, under these standard conditions, humans typically keep an average of around \$80
(Fortsythe et al. 1994). It is interesting to note that the variability observed between different executions
in the responses of the same LLM is comparable to the diversity of behaviors observed in humans. In other words,
......
This diff is collapsed.
......@@ -21,34 +21,39 @@ class AgentResponse(BaseModel):
# The dictator game simulation class
class Dictator:
def __init__(self, amount: int, model: str, temperature: float, max_retries: int = 3):
def __init__(self, amount: int, model: str, temperature: float, strategy=False, max_retries: int = 3):
self.amount = amount
self.model = model
self.temperature = temperature
self.strategy = strategy
self.max_retries = max_retries # Maximum retry attempts in case of hallucinations
is_openai_model = model.startswith("gpt")
base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
if not strategy:
is_openai_model = model.startswith("gpt")
base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
model_info = {
"temperature": self.temperature,
"function_calling": True,
"parallel_tool_calls": True,
"family": "unknown",
"json_output": True,
"vision": False
}
model_info = {
"temperature": self.temperature,
"function_calling": True,
"parallel_tool_calls": True,
"family": "unknown",
"json_output": True,
"vision": False
}
self.model_client = OpenAIChatCompletionClient(
model=self.model,
base_url=base_url,
api_key=OPENAI_API_KEY,
model_info=model_info,
response_format=AgentResponse
)
self.model_client = OpenAIChatCompletionClient(
model=self.model,
base_url=base_url,
api_key=OPENAI_API_KEY,
model_info=model_info,
response_format=AgentResponse
)
async def run(self) -> Dict:
"""Runs the model and ensures a valid response."""
"""Runs the model if strategy is False, otherwise uses a classical method."""
if self.strategy:
return self.apply_strategy()
instruction = f"""
Hi, let’s play a game. You are paired with another player.
Your role is to decide how to divide ${self.amount}, and the other player simply receives your choice.
......@@ -84,8 +89,39 @@ class Dictator:
raise ValueError("Model failed to provide a valid response after multiple attempts.")
def apply_strategy(self) -> Dict:
"""Generates a response."""
if (self.model == "gpt-4.5-preview-2025-02-27") :
my_share = int(0.7 * self.amount) # Example rule: keep 70%
other_share = self.amount - my_share
motivation = f"Using strategy from {self.model}, I chose to keep {my_share} and give {other_share}."
return {"my_share": my_share, "other_share": other_share, "motivation": motivation}
if (self.model == "llama3") :
my_share = self.amount / 2
other_share = self.amount - my_share
motivation = "I'm being fair and generous!"
agent_response = AgentResponse(my_share=my_share, other_share=other_share, motivation=motivation)
return agent_response.model_dump()
if (self.model == "mistral-small") :
my_share = self.amount // 2
other_share = self.amount - my_share
motivation = "The decision is to divide the money equally."
agent_response = AgentResponse(
my_share=my_share,
other_share=other_share,
motivation=motivation
)
return agent_response
if (self.model == "deepseek-r1") :
half_amount = self.amount // 2
return {
"my_share": half_amount,
"other_share": half_amount,
"motivation": "Split equally between both players."
}
# Run the async function and return the response
if __name__ == "__main__":
game_agent = Dictator(amount=100, model="gpt-4.5-preview-2025-02-27", temperature=0.7)
response_json = asyncio.run(game_agent.get_valid_response())
print(response_json)
\ No newline at end of file
game_agent = Dictator(amount=100, model="laama3", temperature=0.7, strategy=True) # Toggle strategy here
response_json = asyncio.run(game_agent.run())
print(response_json)
......@@ -25,12 +25,24 @@ sns.violinplot(
density_norm="width" # Normalizes the width of the violins for comparison
)
# Add vertical lines for strategies
strategy_values = {
'gpt-4.5-preview-2025-02-27': 70,
'llama3': 50,
'mistral-small': 50,
'deepseek-r1': 50
}
for model, value in strategy_values.items():
plt.axhline(y=value, color=color_palette[model], linestyle="dashed", linewidth=2, label=f"{model} strategy")
# Set the y-axis limits between 0 and 100
plt.ylim(0, 100)
# Labels and title
plt.xlabel("Model")
plt.ylabel("Share of money assigned to oneself")
plt.title("Distribution of personal share by model in the dictator game")
plt.legend()
# Save and display
plt.savefig("../../figures/dictator/dictator_violin.svg", format="svg")
\ No newline at end of file
......@@ -41,7 +41,7 @@ class DictatorExperiment:
# Running the experiment
if __name__ == "__main__":
models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1"]
models = ["llama3", "mistral-small", "deepseek-r1"] # "gpt-4.5-preview-2025-02-27"
temperature = 0.7
amount = 100
iterations = 30
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment