Skip to content
Snippets Groups Projects
Commit 65adc84f authored by Maxime Morge's avatar Maxime Morge :construction_worker:
Browse files

PyGAAMAS: XPs about the Battle of Sexe

parent 35babfbe
No related branches found
No related tags found
No related merge requests found
...@@ -580,7 +580,7 @@ Most lean toward either rigid rationality, indiscriminate cooperation, or unstab ...@@ -580,7 +580,7 @@ Most lean toward either rigid rationality, indiscriminate cooperation, or unstab
| **Mixtral:8x7b** | actions | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | | **Mixtral:8x7b** | actions | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 |
| | actions + ano | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | | | actions + ano | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 | 1.00 |
| **Mistral-Small** | actions | 0.00 | 0.90 | 1.00 | 0.00 | 0.77 | 1.00 | 0.03 | 0.97 | 1.00 | 0.07 | 0.90 | 1.00 | | **Mistral-Small** | actions | 0.00 | 0.90 | 1.00 | 0.00 | 0.77 | 1.00 | 0.03 | 0.97 | 1.00 | 0.07 | 0.90 | 1.00 |
| | actions + ano | 0.10 | 0.77 | 0.97 | 0.17 | 0.77 | 1.00 | N/A | N/A | N/A | 0.43 | 0.43 | 0.90 | | | actions + ano | 0.10 | 0.77 | 0.97 | 0.17 | 0.77 | 1.00 | 0.40 | 0.63 | 1.00 | 0.43 | 0.43 | 0.90 |
| **Deepseek-R1:7b** | actions | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | | **Deepseek-R1:7b** | actions | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
| | actions + ano | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | | | actions + ano | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A | N/A |
| **Deepseek-R1** | actions | 0.87 | 0.97 | 0.93 | 0.83 | 0.83 | 0.93 | 0.87 | 0.97 | 0.90 | 0.87 | 1.00 | 0.93 | | **Deepseek-R1** | actions | 0.87 | 0.97 | 0.93 | 0.83 | 0.83 | 0.93 | 0.87 | 0.97 | 0.90 | 0.87 | 1.00 | 0.93 |
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
annotated-types==0.7.0
anyio==4.9.0
appnope==0.1.4
asitop==0.0.24
asttokens==3.0.0
attr==0.3.2 attr==0.3.2
autogen_agentchat==0.4.7 autogen-agentchat==0.4.7
autogen_core==0.4.7 autogen-core==0.4.7
autogen_ext==0.4.7 autogen-ext==0.4.7
ConfigParser==7.1.0 backcall==0.2.0
blessed==1.21.0
certifi==2025.1.31
cffi==1.17.1
charset-normalizer==3.4.1
comm==0.2.2
configparser==7.1.0
contextlib2==21.6.0 contextlib2==21.6.0
contourpy==1.3.1
cryptography==44.0.1 cryptography==44.0.1
cycler==0.12.1
dashing==0.1.0
decorator==5.2.1
Deprecated==1.2.18
distro==1.9.0
docutils==0.21.2 docutils==0.21.2
executing==2.2.0
filelock==3.18.0
fire==0.7.0
fonttools==4.56.0
fsspec==2025.5.0
h11==0.14.0
HTMLParser==0.0.2 HTMLParser==0.0.2
httpcore==1.0.7
httpx==0.28.1
huggingface-hub==0.31.4
idna==3.10
imageio==2.37.0
importlib_metadata==8.5.0 importlib_metadata==8.5.0
importlib_metadata==8.0.0
ipython==8.12.3 ipython==8.12.3
ipywidgets==8.1.5 ipywidgets==8.1.5
jedi==0.19.2
Jinja2==3.1.5 Jinja2==3.1.5
jnius==1.1.0 jiter==0.9.0
keyring==25.6.0 jsonref==1.1.0
lockfile==0.12.2 jupyterlab_widgets==3.0.15
kiwisolver==1.4.8
MarkupSafe==3.0.2
matplotlib==3.10.1 matplotlib==3.10.1
mock==5.1.0 matplotlib-inline==0.1.7
numpy~=2.2.4 networkx==3.4.2
numpy==2.2.4
openai==1.69.0
opentelemetry-api==1.31.1
packaging==24.2
pandas==2.2.3 pandas==2.2.3
Pillow==11.1.0 parso==0.8.4
protobuf~=5.29.4 pexpect==4.9.0
pydantic~=2.11.1 pickleshare==0.7.5
pyOpenSSL==25.0.0 pillow==11.1.0
railroad==0.5.0 prompt_toolkit==3.0.51
protobuf==5.29.4
psutil==7.0.0
psycopg2==2.9.10
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
pydantic==2.11.1
pydantic_core==2.33.0
pygame==2.6.1
Pygments==2.19.1
pykka==4.2.0
pyparsing==3.2.3
python-dateutil==2.9.0.post0
pytz==2025.2
PyYAML==6.0.2
randomname==0.2.1
regex==2024.11.6
requests==2.32.3
safetensors==0.5.3
scipy==1.15.2 scipy==1.15.2
seaborn==0.13.2 seaborn==0.13.2
Sphinx==8.2.1 six==1.17.0
thread==2.0.5 sniffio==1.3.1
tornado==6.4.2 squarify==0.4.4
truststore==0.10.1 stack-data==0.6.3
urllib3_secure_extra==0.1.0 termcolor==3.1.0
xmlrpclib==1.0.1 tiktoken==0.9.0
tk==0.1.0
requests~=2.32.3 tokenizers==0.21.1
httpx~=0.28.1 tqdm==4.67.1
pip~=25.0.1 traitlets==5.14.3
distro~=1.9.0 transformers==4.52.3
\ No newline at end of file typing-inspection==0.4.0
typing_extensions==4.13.0
tzdata==2025.2
urllib3==2.3.0
wcwidth==0.2.13
widgetsnbextension==4.0.14
wrapt==1.17.2
zipp==3.21.0
import os
import asyncio
import json
import re
import requests
from typing import Dict, Literal, List, Callable
from pydantic import BaseModel, ValidationError
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken
from autogen_ext.models.openai import OpenAIChatCompletionClient
# Load API keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PAGODA_API_KEY = os.getenv("PAGODA_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("Missing OPENAI_API_KEY. Set it as an environment variable.")
if not PAGODA_API_KEY:
raise ValueError("Missing PAGODA_API_KEY. Set it as an environment variable.")
# Response format
class AgentResponse(BaseModel):
move: Literal["Opera", "Football"]
prediction: Literal["Opera", "Football", "None"]
reasoning: str
class BoS:
def __init__(
self,
model: str,
role: str,
prediction: bool,
version: str,
temperature: float,
game_id: int,
opponent_strategy_fn: Callable[[List[Dict]], str],
strategy: bool = False,
total_rounds: int = 10,
max_retries: int = 5
):
self.debug = False
self.model = model
self.role = role.lower()
self.prediction = prediction
self.version = version
self.temperature = temperature
self.game_id = game_id
self.strategy = strategy
self.total_rounds = total_rounds
self.max_retries = max_retries
self.history: List[Dict] = []
self.player_score_game = 0
self.prediction_score = 0
self.opponent_strategy_fn = opponent_strategy_fn
# Payoff matrix
if self.version == "classic":
self.A, self.B, self.C = 3, 2, 0
else:
self.A, self.B, self.C = float('nan'), float('nan'), float('nan')
self.base_url = (
"https://api.openai.com/v1"
if model.startswith("gpt")
else "https://ollama-ui.pagoda.liris.cnrs.fr/ollama/api/generate"
if ":" in model
else "http://localhost:11434/v1"
)
self.model_client = OpenAIChatCompletionClient(
timeout=60,
model=self.model,
base_url=self.base_url,
api_key=OPENAI_API_KEY,
model_info={
"temperature": self.temperature,
"function_calling": True,
"parallel_tool_calls": True,
"family": "unknown",
"json_output": True,
"vision": False
},
response_format=AgentResponse
)
def debug_print(self, msg: str):
if self.debug:
print(f"[DEBUG] {msg}")
async def play_round(self, round_id: int) -> Dict:
opponent_move = self.opponent_strategy_fn(self.history)
if self.strategy:
move, prediction, reasoning = self.apply_strategy(opponent_move)
else:
move, prediction, reasoning = await self.model_based_decision()
outcome = self.determine_outcome(move, opponent_move)
self.update_score(outcome)
prediction_current_score = 0.0
if self.prediction and prediction in ["Opera", "Football"]:
if prediction == opponent_move:
prediction_current_score = 1.0
self.prediction_score += 1
round_result = {
"Agent Move": move,
"Prediction": prediction,
"Opponent Move": opponent_move,
"Reasoning": reasoning,
"Outcome": outcome,
"Current Player Prediction Score Game": self.prediction_score,
"PredictionOutcome": prediction_current_score
}
self.history.append(round_result)
self.debug_print(f"Round {round_id}: {round_result}")
return round_result
def generate_instruction(self, history_summary: str, round_id: int) -> str:
base = f"""
You are playing the Battle of the Sexes.
Game Description:
A couple is deciding how to spend the evening. The Woman prefers the Opera, while the Man prefers Football.
Both would rather go to the same event together than go alone, but each has a different preferred activity.
Payoffs:
- Opera/Opera: Woman gets {self.A}, Man gets {self.B}
- Football/Football: Woman gets {self.B}, Man gets {self.A}
- Mismatch: Both get {self.C}
Your Role: {self.role.capitalize()}
This is round {round_id} of {self.total_rounds}.
Game History:
{history_summary}
"""
if not self.prediction:
return base + """
Your Goal: Maximize your own score by anticipating the opponent's move — whether they are random, pattern-based, or strategic.
Respond ONLY in the following JSON format:
{
"move": "Opera" | "Football",
"prediction": "None",
"reasoning": "Explain your decision based on your preferences and the past moves"
}
"""
return base + """
Your Goal: Maximize your own score by anticipating the opponent's move — whether they are random, pattern-based, or strategic.
Respond ONLY in the following JSON format:
{
"move": "Opera" | "Football",
"prediction": "Opera" | "Football",
"reasoning": "Explain how you predicted the opponent's move and how you chose your response"
}
"""
async def model_based_decision(self) -> (str, str, str):
history_summary = self.get_history_summary()
instruction = self.generate_instruction(history_summary, len(self.history) + 1)
if ":" in self.model:
return await self.run_pagoda(instruction)
for attempt in range(1, self.max_retries + 1):
try:
agent = AssistantAgent(
name="Player",
model_client=self.model_client,
system_message="You are a helpful assistant."
)
response = await agent.on_messages(
[TextMessage(content=instruction, source="user")],
cancellation_token=CancellationToken()
)
content = response.chat_message.content
agent_response = AgentResponse.model_validate_json(content)
return agent_response.move, agent_response.prediction, agent_response.reasoning
except (ValidationError, json.JSONDecodeError) as e:
self.debug_print(f"Attempt {attempt}: Parse error - {e}")
raise ValueError("Model failed to provide a valid response after multiple attempts.")
async def run_pagoda(self, instruction: str) -> (str, str, str):
headers = {
"Authorization": f"Bearer {PAGODA_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"temperature": self.temperature,
"prompt": instruction,
"stream": False
}
for attempt in range(1, self.max_retries + 1):
try:
response = requests.post(self.base_url, headers=headers, json=payload)
response.raise_for_status()
raw_response = response.json().get("response", "")
parsed_json = self.extract_json_from_response(raw_response)
if parsed_json:
agent_response = AgentResponse(**parsed_json)
return agent_response.move, agent_response.prediction, agent_response.reasoning
self.debug_print(f"Attempt {attempt}: Could not parse JSON from: {raw_response}")
except Exception as e:
self.debug_print(f"Attempt {attempt}: Pagoda error - {e}")
raise ValueError("Pagoda API failed after multiple attempts.")
def extract_json_from_response(self, text: str) -> dict:
try:
match = re.search(r"\{.*\}", text, re.DOTALL)
if match:
return json.loads(match.group())
except Exception as e:
self.debug_print(f"JSON extract error: {e}")
return {}
def determine_outcome(self, player_move: str, opponent_move: str) -> int:
if player_move == opponent_move:
return self.A if (player_move == "Football") == (self.role == "man") else self.B
return self.C
def apply_strategy(self, opponent_move: str) -> (str, str, str):
move = "Opera" if self.role == "woman" else "Football"
reasoning = f"Heuristic strategy: As a {self.role}, I always choose my preferred option ({move})."
return move, "None", reasoning
def update_score(self, outcome: int):
self.player_score_game += outcome
def get_history_summary(self) -> str:
if not self.history:
return "No previous rounds."
lines = [
f"Round {i + 1}: You chose {r['Agent Move']}, Opponent chose {r['Opponent Move']}. Score: {r['Outcome']}"
for i, r in enumerate(self.history)
]
return "\n".join(lines) + f"\nTotal Playing Score: {self.player_score_game}\nCorrect Predictions: {self.prediction_score}/{len(self.history)}"
# Runner
async def main():
total_rounds = 10
game = BoS(
model="mistral-small",
role="man",
prediction=True,
version="classic",
temperature=0.7,
game_id=1,
opponent_strategy_fn=lambda history: "Opera",
strategy=False,
total_rounds=total_rounds
)
for round_id in range(1, total_rounds + 1):
await game.play_round(round_id)
print(f"Final Score: {game.player_score_game}")
print(f"Correct Predictions: {game.prediction_score}/{total_rounds}")
accuracy = game.prediction_score / total_rounds * 100
print(f"Prediction Accuracy: {accuracy:.1f}%")
if __name__ == "__main__":
asyncio.run(main())
\ No newline at end of file
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Path to the BoS CSV file
CSV_FILE_PATH = "../../data/bos/bos.csv"
FIGURE_DIR = "../../figures/bos"
os.makedirs(FIGURE_DIR, exist_ok=True)
# Load and clean data
df = pd.read_csv(CSV_FILE_PATH)
df = df[df["outcomeRound"].notnull()]
df["idRound"] = df["idRound"].astype(int)
df["outcomeRound"] = df["outcomeRound"].astype(float)
df["predictionRound"] = df.get("predictionRound", 0).fillna(0).astype(float)
# Filter opponent strategies
strategies_of_interest = ["opera_football", "football_opera"]
df_filtered = df[df["opponentStrategy"].isin(strategies_of_interest)].copy()
# Plot settings
color_palette = {
'qwen3': '#c02942', 'qwen3 strategy': '#c02942',
'llama3': '#32a68c', 'llama3 strategy': '#32a68c',
'mistral-small': '#ff6941', 'mistral-small strategy': '#ff6941',
'deepseek-r1': '#5862ed', 'deepseek-r1 strategy': '#5862ed',
}
linestyle_dict = {
'qwen3': 'dotted', 'qwen3 strategy': 'dotted',
'llama3': 'dashed', 'llama3 strategy': 'dashed',
'mistral-small': 'solid', 'mistral-small strategy': 'solid',
'deepseek-r1': 'dashdot', 'deepseek-r1 strategy': 'dashdot',
}
# Function to plot
def plot_metric(metric: str, ylabel: str, title: str, filename: str, ylim: tuple):
agg = df_filtered.groupby(["model", "idRound"]).agg(
mean_val=(metric, "mean"),
sem_val=(metric, lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
).reset_index()
agg["ci95"] = 1.96 * agg["sem_val"]
plt.figure(figsize=(12, 7))
for model, group in agg.groupby("model"):
label = model
color = color_palette.get(model, '#63656a')
linestyle = linestyle_dict.get(model, 'solid')
plt.plot(group["idRound"], group["mean_val"], label=label,
color=color, linestyle=linestyle)
plt.fill_between(group["idRound"],
group["mean_val"] - group["ci95"],
group["mean_val"] + group["ci95"],
color=color, alpha=0.2)
plt.xlim(1, 30)
plt.ylim(*ylim)
plt.xlabel("Round Number")
plt.ylabel(ylabel)
plt.title(title)
plt.legend(loc="upper right")
plt.grid(True)
plt.savefig(os.path.join(FIGURE_DIR, filename), format="svg")
plt.show()
# Plot Payoff
plot_metric(
metric="outcomeRound",
ylabel="Average Points Earned",
title="BoS: Average Points Earned per Round by Model and Role (95% CI)",
filename="bos_payoff.svg",
ylim=(0, 2)
)
# Plot Prediction Score
plot_metric(
metric="predictionRound",
ylabel="Prediction Accuracy",
title="BoS: Prediction Accuracy per Round by Model and Role (95% CI)",
filename="bos_prediction.svg",
ylim=(0, 1.05)
)
\ No newline at end of file
import os
import csv
import asyncio
from bos import BoS
from typing import Callable
CSV_FILE_PATH = "../../data/bos/bos.csv"
class BoSExperiment:
def __init__(self):
self.debug = False
self.strategy = False
self.models = ["mistral-small", "qwen3", "llama3", "deepseek-r1"] # "gpt-4.5-preview-2025-02-27", "mixtral:8x7b", "llama3.3:latest", "deepseek-r1:7b"
self.roles = ["man", "woman"]
self.opponent_strategies = {
"football_opera": self.loop_football_opera,
"opera_football": self.loop_opera_football
}
self.temperature = 0.7
self.rounds = 30
self.num_games_per_config = 10
self.initialize_csv()
def initialize_csv(self):
if not os.path.exists(CSV_FILE_PATH):
os.makedirs(os.path.dirname(CSV_FILE_PATH), exist_ok=True)
with open(CSV_FILE_PATH, mode="w", newline="") as file:
writer = csv.writer(file)
writer.writerow([
"idGame", "model", "role", "opponentStrategy", "idRound",
"playerMove", "prediction", "opponentMove", "outcomeRound",
"currentPlayerScoreGame", "predictionRound", "currentPlayerPredictionScoreGame", "reasoning"
])
def sanitize_reasoning(self, reasoning: str) -> str:
sanitized = reasoning.replace('"', '""').replace('\n', ' ').replace('\r', '')
if sanitized and sanitized[0] in ('=', '+', '-', '@'):
sanitized = "'" + sanitized
return f'"{sanitized}"'
def log_to_csv(self, game_id, model, role, opponent_strategy, round_id,
agent_move, prediction, opponent_move, outcome,
player_score_game, prediction_round_score, prediction_total_score, reasoning):
sanitized_reasoning = self.sanitize_reasoning(reasoning)
model_type = model + " strategy" if self.strategy else model
with open(CSV_FILE_PATH, mode="a", newline="") as file:
writer = csv.writer(file)
writer.writerow([
game_id, model_type, role, opponent_strategy, round_id,
agent_move, prediction, opponent_move, outcome,
player_score_game, prediction_round_score, prediction_total_score, sanitized_reasoning
])
async def run_experiment(self):
game_id = 1
for model in self.models:
for role in self.roles:
for strategy_name, strategy_fn in self.opponent_strategies.items():
for _ in range(self.num_games_per_config):
await self.run_game(model, role, strategy_name, strategy_fn, game_id)
game_id += 1
async def run_game(self, model, role, opponent_strategy_name, opponent_strategy_fn, game_id):
game = BoS(
model=model,
role=role,
prediction=True,
version="classic",
temperature=self.temperature,
game_id=game_id,
opponent_strategy_fn=opponent_strategy_fn,
strategy=self.strategy,
total_rounds=self.rounds
)
for i in range(1, self.rounds + 1):
round_data = await game.play_round(i)
prediction_round_score = 1.0 if round_data.get("Prediction") == round_data.get("Opponent Move") else 0.0
prediction_total_score = game.prediction_score
self.log_to_csv(
game_id, model, role, opponent_strategy_name, i,
round_data["Agent Move"], round_data["Prediction"],
round_data["Opponent Move"], round_data["Outcome"],
game.player_score_game, prediction_round_score, prediction_total_score,
round_data["Reasoning"]
)
def loop_football_opera(self, history):
return "Football" if len(history) % 2 == 0 else "Opera"
def loop_opera_football(self, history):
return "Opera" if len(history) % 2 == 0 else "Football"
if __name__ == "__main__":
experiment = BoSExperiment()
asyncio.run(experiment.run_experiment())
print("BoS experiment completed. Results saved in", CSV_FILE_PATH)
\ No newline at end of file
...@@ -2,7 +2,6 @@ import os ...@@ -2,7 +2,6 @@ import os
import csv import csv
import asyncio import asyncio
import random import random
from http.cookiejar import debug
from rps import RPS from rps import RPS
from typing import Callable from typing import Callable
...@@ -79,7 +78,7 @@ class RPSExperiment: ...@@ -79,7 +78,7 @@ class RPSExperiment:
if self.debug: if self.debug:
print(f"Running strategy {strategy_name}") print(f"Running strategy {strategy_name}")
for _ in range(self.num_games_per_config): for _ in range(self.num_games_per_config):
if debug: if self.debug:
print(f"Running game {game_id}") print(f"Running game {game_id}")
await self.run_game(model, strategy_name, strategy_fn, game_id) await self.run_game(model, strategy_name, strategy_fn, game_id)
game_id += 1 game_id += 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment