PyGAAMAS: XPs about the Battle of Sexe

65adc84f · Maxime Morge · 35babfbe · 65adc84f · 65adc84f · 65adc84f
Commit 65adc84f authored 3 weeks ago by Maxime Morge
--- a/README.md
+++ b/README.md
@@ -580,7 +580,7 @@ Most lean toward either rigid rationality, indiscriminate cooperation, or unstab
 | **Mixtral:8x7b**    | actions        | 1.00         | 1.00        | 1.00      | 1.00         | 1.00        | 1.00      | 1.00         | 1.00        | 1.00      | 1.00            | 1.00        | 1.00      |
 |                     | actions + ano  | 1.00         | 1.00        | 1.00      | 1.00         | 1.00        | 1.00      | 1.00         | 1.00        | 1.00      | 1.00            | 1.00        | 1.00      |
 | **Mistral-Small**   | actions        | 0.00         | 0.90        | 1.00      | 0.00         | 0.77        | 1.00      | 0.03         | 0.97        | 1.00      | 0.07            | 0.90        | 1.00      |
-|                     | actions + ano  | 0.10         | 0.77        | 0.97      | 0.17         | 0.77        | 1.00      | N/A          | N/A         | N/A       | 0.43            | 0.43        | 0.90      |
+|                     | actions + ano  | 0.10         | 0.77        | 0.97      | 0.17         | 0.77        | 1.00      | 0.40         | 0.63        | 1.00      | 0.43            | 0.43        | 0.90      |
 | **Deepseek-R1:7b**  | actions        | N/A          | N/A         | N/A       | N/A          | N/A         | N/A       | N/A          | N/A         | N/A       | N/A             | N/A         | N/A       |
 |                     | actions + ano  | N/A          | N/A         | N/A       | N/A          | N/A         | N/A       | N/A          | N/A         | N/A       | N/A             | N/A         | N/A       |
 | **Deepseek-R1**     | actions        | 0.87         | 0.97        | 0.93      | 0.83         | 0.83        | 0.93      | 0.87         | 0.97        | 0.90      | 0.87            | 1.00        | 0.93      |

--- a/data/bos/bos.csv
+++ b/data/bos/bos.csv
--- a/figures/bos/bos_payoff.svg
+++ b/figures/bos/bos_payoff.svg
--- a/figures/bos/bos_prediction.svg
+++ b/figures/bos/bos_prediction.svg
--- a/requirements.txt
+++ b/requirements.txt
+annotated-types==0.7.0
+anyio==4.9.0
+appnope==0.1.4
+asitop==0.0.24
+asttokens==3.0.0
 attr==0.3.2
-autogen_agentchat==0.4.7
+autogen-agentchat==0.4.7
-autogen_core==0.4.7
+autogen-core==0.4.7
-autogen_ext==0.4.7
+autogen-ext==0.4.7
-ConfigParser==7.1.0
+backcall==0.2.0
+blessed==1.21.0
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+comm==0.2.2
+configparser==7.1.0
 contextlib2==21.6.0
+contourpy==1.3.1
 cryptography==44.0.1
+cycler==0.12.1
+dashing==0.1.0
+decorator==5.2.1
+Deprecated==1.2.18
+distro==1.9.0
 docutils==0.21.2
+executing==2.2.0
+filelock==3.18.0
+fire==0.7.0
+fonttools==4.56.0
+fsspec==2025.5.0
+h11==0.14.0
 HTMLParser==0.0.2
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.31.4
+idna==3.10
+imageio==2.37.0
 importlib_metadata==8.5.0
-importlib_metadata==8.0.0
 ipython==8.12.3
 ipywidgets==8.1.5
+jedi==0.19.2
 Jinja2==3.1.5
-jnius==1.1.0
+jiter==0.9.0
-keyring==25.6.0
+jsonref==1.1.0
-lockfile==0.12.2
+jupyterlab_widgets==3.0.15
+kiwisolver==1.4.8
+MarkupSafe==3.0.2
 matplotlib==3.10.1
-mock==5.1.0
+matplotlib-inline==0.1.7
-numpy~=2.2.4
+networkx==3.4.2
+numpy==2.2.4
+openai==1.69.0
+opentelemetry-api==1.31.1
+packaging==24.2
 pandas==2.2.3
-Pillow==11.1.0
+parso==0.8.4
-protobuf~=5.29.4
+pexpect==4.9.0
-pydantic~=2.11.1
+pickleshare==0.7.5
-pyOpenSSL==25.0.0
+pillow==11.1.0
-railroad==0.5.0
+prompt_toolkit==3.0.51
+protobuf==5.29.4
+psutil==7.0.0
+psycopg2==2.9.10
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+pydantic==2.11.1
+pydantic_core==2.33.0
+pygame==2.6.1
+Pygments==2.19.1
+pykka==4.2.0
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+pytz==2025.2
+PyYAML==6.0.2
+randomname==0.2.1
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.5.3
 scipy==1.15.2
 seaborn==0.13.2
-Sphinx==8.2.1
+six==1.17.0
-thread==2.0.5
+sniffio==1.3.1
-tornado==6.4.2
+squarify==0.4.4
-truststore==0.10.1
+stack-data==0.6.3
-urllib3_secure_extra==0.1.0
+termcolor==3.1.0
-xmlrpclib==1.0.1
+tiktoken==0.9.0
+tk==0.1.0
-requests~=2.32.3
+tokenizers==0.21.1
-httpx~=0.28.1
+tqdm==4.67.1
-pip~=25.0.1
+traitlets==5.14.3
-distro~=1.9.0
+transformers==4.52.3
\ No newline at end of file
+typing-inspection==0.4.0
+typing_extensions==4.13.0
+tzdata==2025.2
+urllib3==2.3.0
+wcwidth==0.2.13
+widgetsnbextension==4.0.14
+wrapt==1.17.2
+zipp==3.21.0
--- a/src/bos/__init__.py
+++ b/src/bos/__init__.py
--- a/src/bos/bos.py
+++ b/src/bos/bos.py
+import os
+import asyncio
+import json
+import re
+import requests
+from typing import Dict, Literal, List, Callable
+from pydantic import BaseModel, ValidationError
+from autogen_agentchat.agents import AssistantAgent
+from autogen_agentchat.messages import TextMessage
+from autogen_core import CancellationToken
+from autogen_ext.models.openai import OpenAIChatCompletionClient
+# Load API keys
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+PAGODA_API_KEY = os.getenv("PAGODA_API_KEY")
+if not OPENAI_API_KEY:
+    raise ValueError("Missing OPENAI_API_KEY. Set it as an environment variable.")
+if not PAGODA_API_KEY:
+    raise ValueError("Missing PAGODA_API_KEY. Set it as an environment variable.")
+# Response format
+class AgentResponse(BaseModel):
+    move: Literal["Opera", "Football"]
+    prediction: Literal["Opera", "Football", "None"]
+    reasoning: str
+class BoS:
+    def __init__(
+        self,
+        model: str,
+        role: str,
+        prediction: bool,
+        version: str,
+        temperature: float,
+        game_id: int,
+        opponent_strategy_fn: Callable[[List[Dict]], str],
+        strategy: bool = False,
+        total_rounds: int = 10,
+        max_retries: int = 5
+    ):
+        self.debug = False
+        self.model = model
+        self.role = role.lower()
+        self.prediction = prediction
+        self.version = version
+        self.temperature = temperature
+        self.game_id = game_id
+        self.strategy = strategy
+        self.total_rounds = total_rounds
+        self.max_retries = max_retries
+        self.history: List[Dict] = []
+        self.player_score_game = 0
+        self.prediction_score = 0
+        self.opponent_strategy_fn = opponent_strategy_fn
+        # Payoff matrix
+        if self.version == "classic":
+            self.A, self.B, self.C = 3, 2, 0
+        else:
+            self.A, self.B, self.C = float('nan'), float('nan'), float('nan')
+        self.base_url = (
+            "https://api.openai.com/v1"
+            if model.startswith("gpt")
+            else "https://ollama-ui.pagoda.liris.cnrs.fr/ollama/api/generate"
+            if ":" in model
+            else "http://localhost:11434/v1"
+        )
+        self.model_client = OpenAIChatCompletionClient(
+            timeout=60,
+            model=self.model,
+            base_url=self.base_url,
+            api_key=OPENAI_API_KEY,
+            model_info={
+                "temperature": self.temperature,
+                "function_calling": True,
+                "parallel_tool_calls": True,
+                "family": "unknown",
+                "json_output": True,
+                "vision": False
+            },
+            response_format=AgentResponse
+        )
+    def debug_print(self, msg: str):
+        if self.debug:
+            print(f"[DEBUG] {msg}")
+    async def play_round(self, round_id: int) -> Dict:
+        opponent_move = self.opponent_strategy_fn(self.history)
+        if self.strategy:
+            move, prediction, reasoning = self.apply_strategy(opponent_move)
+        else:
+            move, prediction, reasoning = await self.model_based_decision()
+        outcome = self.determine_outcome(move, opponent_move)
+        self.update_score(outcome)
+        prediction_current_score = 0.0
+        if self.prediction and prediction in ["Opera", "Football"]:
+            if prediction == opponent_move:
+                prediction_current_score = 1.0
+                self.prediction_score += 1
+        round_result = {
+            "Agent Move": move,
+            "Prediction": prediction,
+            "Opponent Move": opponent_move,
+            "Reasoning": reasoning,
+            "Outcome": outcome,
+            "Current Player Prediction Score Game": self.prediction_score,
+            "PredictionOutcome": prediction_current_score
+        }
+        self.history.append(round_result)
+        self.debug_print(f"Round {round_id}: {round_result}")
+        return round_result
+    def generate_instruction(self, history_summary: str, round_id: int) -> str:
+        base = f"""
+You are playing the Battle of the Sexes.
+Game Description:
+A couple is deciding how to spend the evening. The Woman prefers the Opera, while the Man prefers Football.
+Both would rather go to the same event together than go alone, but each has a different preferred activity.
+Payoffs:
+- Opera/Opera: Woman gets {self.A}, Man gets {self.B}
+- Football/Football: Woman gets {self.B}, Man gets {self.A}
+- Mismatch: Both get {self.C}
+Your Role: {self.role.capitalize()}
+This is round {round_id} of {self.total_rounds}.
+Game History:
+{history_summary}
+"""
+        if not self.prediction:
+            return base + """
+Your Goal: Maximize your own score by anticipating the opponent's move — whether they are random, pattern-based, or strategic.
+Respond ONLY in the following JSON format:
+{
+  "move": "Opera" | "Football",
+  "prediction": "None",
+  "reasoning": "Explain your decision based on your preferences and the past moves"
+}
+"""
+        return base + """
+Your Goal: Maximize your own score by anticipating the opponent's move — whether they are random, pattern-based, or strategic.
+Respond ONLY in the following JSON format:
+{
+  "move": "Opera" | "Football",
+  "prediction": "Opera" | "Football",
+  "reasoning": "Explain how you predicted the opponent's move and how you chose your response"
+}
+"""
+    async def model_based_decision(self) -> (str, str, str):
+        history_summary = self.get_history_summary()
+        instruction = self.generate_instruction(history_summary, len(self.history) + 1)
+        if ":" in self.model:
+            return await self.run_pagoda(instruction)
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                agent = AssistantAgent(
+                    name="Player",
+                    model_client=self.model_client,
+                    system_message="You are a helpful assistant."
+                )
+                response = await agent.on_messages(
+                    [TextMessage(content=instruction, source="user")],
+                    cancellation_token=CancellationToken()
+                )
+                content = response.chat_message.content
+                agent_response = AgentResponse.model_validate_json(content)
+                return agent_response.move, agent_response.prediction, agent_response.reasoning
+            except (ValidationError, json.JSONDecodeError) as e:
+                self.debug_print(f"Attempt {attempt}: Parse error - {e}")
+        raise ValueError("Model failed to provide a valid response after multiple attempts.")
+    async def run_pagoda(self, instruction: str) -> (str, str, str):
+        headers = {
+            "Authorization": f"Bearer {PAGODA_API_KEY}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "model": self.model,
+            "temperature": self.temperature,
+            "prompt": instruction,
+            "stream": False
+        }
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                response = requests.post(self.base_url, headers=headers, json=payload)
+                response.raise_for_status()
+                raw_response = response.json().get("response", "")
+                parsed_json = self.extract_json_from_response(raw_response)
+                if parsed_json:
+                    agent_response = AgentResponse(**parsed_json)
+                    return agent_response.move, agent_response.prediction, agent_response.reasoning
+                self.debug_print(f"Attempt {attempt}: Could not parse JSON from: {raw_response}")
+            except Exception as e:
+                self.debug_print(f"Attempt {attempt}: Pagoda error - {e}")
+        raise ValueError("Pagoda API failed after multiple attempts.")
+    def extract_json_from_response(self, text: str) -> dict:
+        try:
+            match = re.search(r"\{.*\}", text, re.DOTALL)
+            if match:
+                return json.loads(match.group())
+        except Exception as e:
+            self.debug_print(f"JSON extract error: {e}")
+        return {}
+    def determine_outcome(self, player_move: str, opponent_move: str) -> int:
+        if player_move == opponent_move:
+            return self.A if (player_move == "Football") == (self.role == "man") else self.B
+        return self.C
+    def apply_strategy(self, opponent_move: str) -> (str, str, str):
+        move = "Opera" if self.role == "woman" else "Football"
+        reasoning = f"Heuristic strategy: As a {self.role}, I always choose my preferred option ({move})."
+        return move, "None", reasoning
+    def update_score(self, outcome: int):
+        self.player_score_game += outcome
+    def get_history_summary(self) -> str:
+        if not self.history:
+            return "No previous rounds."
+        lines = [
+            f"Round {i + 1}: You chose {r['Agent Move']}, Opponent chose {r['Opponent Move']}. Score: {r['Outcome']}"
+            for i, r in enumerate(self.history)
+        ]
+        return "\n".join(lines) + f"\nTotal Playing Score: {self.player_score_game}\nCorrect Predictions: {self.prediction_score}/{len(self.history)}"
+# Runner
+async def main():
+    total_rounds = 10
+    game = BoS(
+        model="mistral-small",
+        role="man",
+        prediction=True,
+        version="classic",
+        temperature=0.7,
+        game_id=1,
+        opponent_strategy_fn=lambda history: "Opera",
+        strategy=False,
+        total_rounds=total_rounds
+    )
+    for round_id in range(1, total_rounds + 1):
+        await game.play_round(round_id)
+    print(f"Final Score: {game.player_score_game}")
+    print(f"Correct Predictions: {game.prediction_score}/{total_rounds}")
+    accuracy = game.prediction_score / total_rounds * 100
+    print(f"Prediction Accuracy: {accuracy:.1f}%")
+if __name__ == "__main__":
+    asyncio.run(main())
\ No newline at end of file
--- a/src/bos/bos_draw_loop.py
+++ b/src/bos/bos_draw_loop.py
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+# Path to the BoS CSV file
+CSV_FILE_PATH = "../../data/bos/bos.csv"
+FIGURE_DIR = "../../figures/bos"
+os.makedirs(FIGURE_DIR, exist_ok=True)
+# Load and clean data
+df = pd.read_csv(CSV_FILE_PATH)
+df = df[df["outcomeRound"].notnull()]
+df["idRound"] = df["idRound"].astype(int)
+df["outcomeRound"] = df["outcomeRound"].astype(float)
+df["predictionRound"] = df.get("predictionRound", 0).fillna(0).astype(float)
+# Filter opponent strategies
+strategies_of_interest = ["opera_football", "football_opera"]
+df_filtered = df[df["opponentStrategy"].isin(strategies_of_interest)].copy()
+# Plot settings
+color_palette = {
+    'qwen3': '#c02942', 'qwen3 strategy': '#c02942',
+    'llama3': '#32a68c', 'llama3 strategy': '#32a68c',
+    'mistral-small': '#ff6941', 'mistral-small strategy': '#ff6941',
+    'deepseek-r1': '#5862ed', 'deepseek-r1 strategy': '#5862ed',
+}
+linestyle_dict = {
+    'qwen3': 'dotted', 'qwen3 strategy': 'dotted',
+    'llama3': 'dashed', 'llama3 strategy': 'dashed',
+    'mistral-small': 'solid', 'mistral-small strategy': 'solid',
+    'deepseek-r1': 'dashdot', 'deepseek-r1 strategy': 'dashdot',
+}
+# Function to plot
+def plot_metric(metric: str, ylabel: str, title: str, filename: str, ylim: tuple):
+    agg = df_filtered.groupby(["model", "idRound"]).agg(
+        mean_val=(metric, "mean"),
+        sem_val=(metric, lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
+    ).reset_index()
+    agg["ci95"] = 1.96 * agg["sem_val"]
+    plt.figure(figsize=(12, 7))
+    for model, group in agg.groupby("model"):
+        label = model
+        color = color_palette.get(model, '#63656a')
+        linestyle = linestyle_dict.get(model, 'solid')
+        plt.plot(group["idRound"], group["mean_val"], label=label,
+                 color=color, linestyle=linestyle)
+        plt.fill_between(group["idRound"],
+                         group["mean_val"] - group["ci95"],
+                         group["mean_val"] + group["ci95"],
+                         color=color, alpha=0.2)
+    plt.xlim(1, 30)
+    plt.ylim(*ylim)
+    plt.xlabel("Round Number")
+    plt.ylabel(ylabel)
+    plt.title(title)
+    plt.legend(loc="upper right")
+    plt.grid(True)
+    plt.savefig(os.path.join(FIGURE_DIR, filename), format="svg")
+    plt.show()
+# Plot Payoff
+plot_metric(
+    metric="outcomeRound",
+    ylabel="Average Points Earned",
+    title="BoS: Average Points Earned per Round by Model and Role (95% CI)",
+    filename="bos_payoff.svg",
+    ylim=(0, 2)
+)
+# Plot Prediction Score
+plot_metric(
+    metric="predictionRound",
+    ylabel="Prediction Accuracy",
+    title="BoS: Prediction Accuracy per Round by Model and Role (95% CI)",
+    filename="bos_prediction.svg",
+    ylim=(0, 1.05)
+)
\ No newline at end of file
--- a/src/bos/bos_experiments.py
+++ b/src/bos/bos_experiments.py
+import os
+import csv
+import asyncio
+from bos import BoS
+from typing import Callable
+CSV_FILE_PATH = "../../data/bos/bos.csv"
+class BoSExperiment:
+    def __init__(self):
+        self.debug = False
+        self.strategy = False
+        self.models = ["mistral-small", "qwen3", "llama3", "deepseek-r1"] # "gpt-4.5-preview-2025-02-27", "mixtral:8x7b", "llama3.3:latest", "deepseek-r1:7b"
+        self.roles = ["man", "woman"]
+        self.opponent_strategies = {
+           "football_opera": self.loop_football_opera,
+            "opera_football": self.loop_opera_football
+        }
+        self.temperature = 0.7
+        self.rounds = 30
+        self.num_games_per_config = 10
+        self.initialize_csv()
+    def initialize_csv(self):
+        if not os.path.exists(CSV_FILE_PATH):
+            os.makedirs(os.path.dirname(CSV_FILE_PATH), exist_ok=True)
+            with open(CSV_FILE_PATH, mode="w", newline="") as file:
+                writer = csv.writer(file)
+                writer.writerow([
+                    "idGame", "model", "role", "opponentStrategy", "idRound",
+                    "playerMove", "prediction", "opponentMove", "outcomeRound",
+                    "currentPlayerScoreGame", "predictionRound", "currentPlayerPredictionScoreGame", "reasoning"
+                ])
+    def sanitize_reasoning(self, reasoning: str) -> str:
+        sanitized = reasoning.replace('"', '""').replace('\n', ' ').replace('\r', '')
+        if sanitized and sanitized[0] in ('=', '+', '-', '@'):
+            sanitized = "'" + sanitized
+        return f'"{sanitized}"'
+    def log_to_csv(self, game_id, model, role, opponent_strategy, round_id,
+                   agent_move, prediction, opponent_move, outcome,
+                   player_score_game, prediction_round_score, prediction_total_score, reasoning):
+        sanitized_reasoning = self.sanitize_reasoning(reasoning)
+        model_type = model + " strategy" if self.strategy else model
+        with open(CSV_FILE_PATH, mode="a", newline="") as file:
+            writer = csv.writer(file)
+            writer.writerow([
+                game_id, model_type, role, opponent_strategy, round_id,
+                agent_move, prediction, opponent_move, outcome,
+                player_score_game, prediction_round_score, prediction_total_score, sanitized_reasoning
+            ])
+    async def run_experiment(self):
+        game_id = 1
+        for model in self.models:
+            for role in self.roles:
+                for strategy_name, strategy_fn in self.opponent_strategies.items():
+                    for _ in range(self.num_games_per_config):
+                        await self.run_game(model, role, strategy_name, strategy_fn, game_id)
+                        game_id += 1
+    async def run_game(self, model, role, opponent_strategy_name, opponent_strategy_fn, game_id):
+        game = BoS(
+            model=model,
+            role=role,
+            prediction=True,
+            version="classic",
+            temperature=self.temperature,
+            game_id=game_id,
+            opponent_strategy_fn=opponent_strategy_fn,
+            strategy=self.strategy,
+            total_rounds=self.rounds
+        )
+        for i in range(1, self.rounds + 1):
+            round_data = await game.play_round(i)
+            prediction_round_score = 1.0 if round_data.get("Prediction") == round_data.get("Opponent Move") else 0.0
+            prediction_total_score = game.prediction_score
+            self.log_to_csv(
+                game_id, model, role, opponent_strategy_name, i,
+                round_data["Agent Move"], round_data["Prediction"],
+                round_data["Opponent Move"], round_data["Outcome"],
+                game.player_score_game, prediction_round_score, prediction_total_score,
+                round_data["Reasoning"]
+            )
+    def loop_football_opera(self, history):
+        return "Football" if len(history) % 2 == 0 else "Opera"
+    def loop_opera_football(self, history):
+        return "Opera" if len(history) % 2 == 0 else "Football"
+if __name__ == "__main__":
+    experiment = BoSExperiment()
+    asyncio.run(experiment.run_experiment())
+    print("BoS experiment completed. Results saved in", CSV_FILE_PATH)
\ No newline at end of file
--- a/src/rps/rps_experiments.py
+++ b/src/rps/rps_experiments.py
@@ -2,7 +2,6 @@ import os
 import csv
 import asyncio
 import random
-from http.cookiejar import debug
 from rps import RPS
 from typing import Callable
@@ -79,7 +78,7 @@ class RPSExperiment:
                if self.debug:
                    print(f"Running strategy {strategy_name}")
                for _ in range(self.num_games_per_config):
-                    if debug:
+                    if self.debug:
                        print(f"Running game {game_id}")
                    await self.run_game(model, strategy_name, strategy_fn, game_id)
                    game_id += 1