Test preference alignment with pagoda

ea89e1bf · Maxime Morge · 8067a387 · ea89e1bf · ea89e1bf · ea89e1bf
Commit ea89e1bf authored 2 months ago by Maxime Morge
--- a/README.md
+++ b/README.md
@@ -126,22 +126,30 @@ each corresponding to one of the four preferences:
 - The dictator keeps **$325**, the other player receives **$325**, and **$350** is lost (**egalitarian**).
 Table below evaluates the ability of the models to align with different preferences.
- When generating **strategies**, the models align perfectly with preferences, except for **`DeepSeek-R1`**, which does not generate valid code.
+- When generating **strategies**, the models align perfectly with preferences, except for <tt>DeepSeek-R1<tt> and <tt>Mixtral:8x7b</tt> which do not generate valid code.
- When generating **actions**, **`GPT-4.5`** aligns well with preferences but struggles with **utilitarianism**.
+- When generating **actions**, 
- **`Llama3`** aligns well with **egoistic** and **altruistic** preferences but shows lower adherence to **utilitarian** and **egalitarian** choices.
+  - <tt>GPT-4.5<tt> aligns well with preferences but struggles with **utilitarianism**.
- **`Mistral-Small`** aligns better with **altruistic** preferences and performs moderately on **utilitarianism** but struggles with **egoistic** and **egalitarian** preferences.
+  - <tt>Llama3<tt> aligns well with **egoistic** and **altruistic** preferences but shows lower adherence to **utilitarian** and **egalitarian** choices.
- **`DeepSeek-R1`** primarily aligns with **utilitarianism** but has low accuracy in other preferences.
+  - <tt>Mistral-Small</tt> aligns better with **altruistic** preferences and performs moderately on **utilitarianism** but struggles with **egoistic** and **egalitarian** preferences.
+  - <tt>DeepSeek-R1</tt> primarily aligns with **utilitarianism** but has low accuracy in other preferences.
-| **Model**           | **Generation** | **Egoistic** | **Altruistic** | **Utilitarian** | **Egalitarian** |
+It is surprising to note that larger versions of LLMs do not improve, and may even deteriorate, the results.
-|---------------------|---------------|-------------|---------------|---------------|---------------|
-| **`GPT-4.5`**      | **Strategy**   | 1.00        | 1.00          | 1.00          | 1.00          |
+| **Model**                    | **Generation** | **Egoistic** | **Altruistic** | **Utilitarian** | **Egalitarian** |
-| **`Llama3`**       | **Strategy**   | 1.00        | 1.00          | 1.00          | 1.00          |
+|------------------------------|----------------|--------------|----------------|-----------------|-----------------|
-| **`Mistral-Small`**| **Strategy**   | 1.00        | 1.00          | 1.00          | 1.00          |
+| **<tt>GPT-4.5</tt>**         | **Strategy**   | 1.00         | 1.00           | 1.00            | 1.00            |
-| **`DeepSeek-R1`**  | **Strategy**   | -           | -             | -             | -             |
+| **<tt>Llama32:latest</tt>**  | **Strategy**   | 1.00         | 1.00           | 1.00            | 1.00            |
-| **`GPT-4.5`**      | **Actions**    | 1.00        | 1.00          | 0.50          | 1.00          |
+| **<tt>Llama3</tt>**          | **Strategy**   | 1.00         | 1.00           | 1.00            | 1.00            |
-| **`Llama3`**       | **Actions**    | 1.00        | 0.90          | 0.40          | 0.73          |
+| **<tt>Mixtral:8x7b</tt>**    | **Strategy**   | -            | -              | -               | -               |
-| **`Mistral-Small`**| **Actions**    | 0.40        | 0.93          | 0.76          | 0.16          |
+| **<tt>Mistral-Small</tt>**   | **Strategy**   | 1.00         | 1.00           | 1.00            | 1.00            |
-| **`DeepSeek-R1`**  | **Actions**    | 0.06        | 0.20          | 0.76          | 0.03          |
+| **<tt>DeepSeek-R1:7b</tt>**  | **Strategy**   | 1.00         | 1.00           | 1.00            | 1.00            |
+| **<tt>DeepSeek-R1</tt>**     | **Strategy**   | -            | -              | -               | -               |
+| **<tt>GPT-4.5<tt>**          | **Actions**    | 1.00         | 1.00           | 0.50            | 1.00            |
+| **<tt>Llama3.3:latest</tt>** | **Actions**    | 0.50         | 0.50           | 0.21            | 0.48            |
+| **<tt>Llama3</tt>**          | **Actions**    | 1.00         | 0.90           | 0.40            | 0.73            |
+| **<tt>Mixtral:8x7b</tt>**    | **Actions**    | 0.00         | 0.00           | 0.00            | 0.50            |
+| **<tt>Mistral-Small</tt>**   | **Actions**    | 0.40         | 0.93           | 0.76            | 0.16            |
+| **<tt>DeepSeek-R1:7b</tt>**  | **Actions**    | 0.23         | 0.28           | 0.33            | 0.45            |
+| **<tt>DeepSeek-R1</tt>**     | **Actions**    | 0.06         | 0.20           | 0.76            | 0.03            |
 Errors in action selection may stem from either arithmetic miscalculations  
 (e.g., the model incorrectly assumes that $500 + 100 > 400 + 300$) or  

--- a/data/dictator/dictator_setup.csv
+++ b/data/dictator/dictator_setup.csv
--- a/figures/dictator/dictator_setup_accuracy.csv
+++ b/figures/dictator/dictator_setup_accuracy.csv
 Model,ALTRUISTIC,EGALITARIAN,SELFISH,UTILITARIAN
 deepseek-r1,0.2,0.03333333333333333,0.06666666666666667,0.7666666666666667
+deepseek-r1:7b,0.2833333333333333,0.45,0.23333333333333334,0.3333333333333333
 gpt-4.5-preview-2025-02-27,1.0,1.0,1.0,0.5
 llama3,0.9,0.7333333333333333,1.0,0.4
+llama3.3:latest,0.5,0.48333333333333334,0.5,0.21666666666666667
 mistral-small,0.9333333333333333,0.16666666666666666,0.4,0.7666666666666667
+mixtral:8x7b,0.0,0.5,0.0,0.15
--- a/src/dictator/dictator_setup.py
+++ b/src/dictator/dictator_setup.py
@@ -2,6 +2,9 @@
 import os
 import asyncio
 import json
+import re
+import logging
+import requests
 from typing import Literal, Dict
 from pydantic import BaseModel
 from autogen_agentchat.agents import AssistantAgent
@@ -10,11 +13,17 @@ from autogen_core import CancellationToken
 from autogen_ext.models.openai import OpenAIChatCompletionClient
 from welfare import Welfare
+logger = logging.getLogger(__name__)
 # Load API key from environment variable
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-# Validate API key
+PAGODA_API_KEY = os.getenv("PAGODA_API_KEY")
+# Validate API keys
 if not OPENAI_API_KEY:
    raise ValueError("Missing OPENAI_API_KEY. Set it as an environment variable.")
+if not PAGODA_API_KEY:
+    raise ValueError("Missing PAGODA_API_KEY. Set it as an environment variable.")
 class AgentResponse(BaseModel):
@@ -25,7 +34,6 @@ class AgentResponse(BaseModel):
    @classmethod
    def validate_combination(cls, values):
-        """Ensure (my_share, other_share, lost) is a valid predefined combination"""
        valid_combinations = {
            (500, 100, 400),
            (100, 500, 400),
@@ -37,41 +45,52 @@ class AgentResponse(BaseModel):
        return values
    class Config:
-        validate_assignment = True  # Ensure values are always validated
+        validate_assignment = True
 class DictatorSetup:
-    debug = False
+    debug = True
-    def __init__(self, model: str, temperature: float, preference: Welfare, strategy=False):
+    def __init__(self, model: str, temperature: float, preference: Welfare, strategy=False, max_retries: int=3):
        self.model = model
        self.temperature = temperature
        self.preference = preference
        self.strategy = strategy
+        self.max_retries = max_retries
-        if not self.strategy:
+        is_openai_model = model.startswith("gpt")
-            is_openai_model = model.startswith("gpt")
+        is_pagoda_model = ":" in model
-            base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
+        if is_openai_model:
-            self.model_info = {
+            base_url = "https://api.openai.com/v1"
-                "temperature": self.temperature,
+            api_key = OPENAI_API_KEY
-                "function_calling": True,
+        elif is_pagoda_model:
-                "parallel_tool_calls": True,
+            base_url = "https://ollama-ui.pagoda.liris.cnrs.fr/ollama/api/generate"
-                "family": "unknown",
+            api_key = PAGODA_API_KEY
-                "json_output": True,
+        else:
-                "vision": False
+            base_url = "http://localhost:11434/v1"
-            }
+            api_key = None
+        self.base_url = base_url
+        self.api_key = api_key
+        if not self.strategy and is_openai_model:
            self.model_client = OpenAIChatCompletionClient(
                model=self.model,
                base_url=base_url,
-                api_key=OPENAI_API_KEY,
+                api_key=api_key,
-                model_info=self.model_info,
+                model_info={
+                    "temperature": self.temperature,
+                    "function_calling": True,
+                    "parallel_tool_calls": True,
+                    "family": "unknown",
+                    "json_output": True,
+                    "vision": False
+                },
                response_format=AgentResponse
            )
    async def run(self) -> Dict:
-        """Runs the dictator game and returns a detailed response dictionary."""
        if self.strategy:
            return self.apply_strategy(self.model)
@@ -80,17 +99,20 @@ class DictatorSetup:
        Your role is to decide how to divide $1000, and the other player simply receives your choice.
        Money can be lost in the process.
        {self.preference.description}
        How would you like to divide the money?
        Choose one of the following options:
        - my_share: 500, other_share: 100, lost: 400
        - my_share: 100, other_share: 500, lost: 400
        - my_share: 400, other_share: 300, lost: 300
        - my_share: 325, other_share: 325, lost: 350
+        Return the response in JSON format with `my_share`, `other_share`, `lost` and `motivation`.
-        Please provide one specific choice, stating how much you keep, how much you give, and how much is lost.
        """
+        if ":" in self.model:
+            return await self.run_pagoda(instruction)
        agent = AssistantAgent(
            name="Dictator",
            model_client=self.model_client,
@@ -102,32 +124,139 @@ class DictatorSetup:
        )
        response_data = response.chat_message.content
-        if self.debug:
-            print(f"Raw Response: {response_data}")
        response_dict = json.loads(response_data)
        agent_response = AgentResponse.model_validate(response_dict)
-        is_consistent = self.check_consistency(agent_response)
        return {
-            "is_consistent": is_consistent,
+            "is_consistent": self.check_consistency(agent_response),
            "my_share": agent_response.my_share,
            "other_share": agent_response.other_share,
            "lost": agent_response.lost,
            "motivations": agent_response.motivation
        }
+    async def run_pagoda(self, instruction) -> Dict:
+        url = self.base_url
+        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
+        payload = {
+            "model": self.model,
+            "temperature": self.temperature,
+            "prompt": instruction,
+            "stream": False
+        }
+        for attempt in range(self.max_retries):
+            try:
+                response = requests.post(url, headers=headers, json=payload)
+                response.raise_for_status()
+                response_data = response.json()
+                if self.debug:
+                    print(f"Raw response (Attempt {attempt + 1}): {response_data}")
+                # Extract JSON response field
+                response_json = response_data.get('response', '')
+                parsed_response = self.extract_json_from_response(response_json)
+                if not parsed_response:
+                    print(f"Failed to extract JSON from response (Attempt {attempt + 1}): {response_json}")
+                    continue
+                # Validate extracted response
+                required_keys = {"my_share", "other_share", "lost", "motivation"}
+                if not required_keys.issubset(parsed_response.keys()):
+                    print(f"Missing required keys in response (Attempt {attempt + 1}): {parsed_response}")
+                    continue
+                my_share, other_share, lost, motivation = (
+                    parsed_response["my_share"],
+                    parsed_response["other_share"],
+                    parsed_response["lost"],
+                    parsed_response["motivation"])
+                if 0 <= my_share <= 1000 and 0 <= other_share <= 1000 and 0 <= lost <= 1000 and my_share + other_share + lost <= 1000:
+                    is_consistent = self.check_consistency(AgentResponse(my_share=my_share, other_share=other_share, lost=lost, motivation=motivation))
+                    return {
+                        "is_consistent": is_consistent,
+                        "my_share": my_share,
+                        "other_share": other_share,
+                        "lost": lost,
+                        "motivations": motivation
+                    }
+                else:
+                    print(f"Invalid response values (Attempt {attempt + 1}): {parsed_response}")
+                    continue
+            except requests.RequestException as e:
+                print(f"Request error (Attempt {attempt + 1}): {e}")
+            except json.JSONDecodeError as e:
+                print(f"JSON decoding error (Attempt {attempt + 1}): {e}")
+            except Exception as e:
+                print(f"Unexpected error (Attempt {attempt + 1}): {e}")
+        raise ValueError("Pagoda model failed to provide a valid response after multiple attempts.")
+    def extract_json_from_response(self, response_text: str) -> dict:
+        """Extracts and parses JSON from a model response, handling escaping issues."""
+        try:
+            # Normalize escaped underscores
+            cleaned_text = response_text.strip().replace('\\_', '_')
+            # Direct JSON parsing if response is already valid JSON
+            if cleaned_text.startswith("{") and cleaned_text.endswith("}"):
+                return json.loads(cleaned_text)
+            # Try extracting JSON from Markdown-style code blocks
+            json_match = re.search(r"```json\s*([\s\S]*?)\s*```", cleaned_text)
+            if json_match:
+                json_str = json_match.group(1).strip()
+            else:
+                # Try extracting any JSON-like substring
+                json_match = re.search(r"\{[\s\S]*?\}", cleaned_text)
+                if json_match:
+                    json_str = json_match.group(0).strip()
+                else:
+                    logger.warning("No JSON found in response: %s", response_text)
+                    return {}
+            # Parse the extracted JSON
+            parsed_json = json.loads(json_str)
+            # Validate expected keys
+            expected_keys = {"my_share", "other_share", "lost", "motivation"}
+            if not expected_keys.issubset(parsed_json.keys()):
+                logger.warning("Missing required keys in parsed JSON: %s", parsed_json)
+                return {}
+            return parsed_json
+        except json.JSONDecodeError as e:
+            logger.error("Failed to parse extracted JSON: %s | Error: %s", response_text, e)
+            return {}
+    def check_consistency(self, agent_response: AgentResponse) -> bool:
+        """Check if the response aligns with the given preference."""
+        valid_choices = {
+            Welfare.SELFISH: (500, 100, 400),
+            Welfare.ALTRUISTIC: (100, 500, 400),
+            Welfare.UTILITARIAN: (400, 300, 300),
+            Welfare.EGALITARIAN: (325, 325, 350),
+        }
+        expected_values = valid_choices.get(self.preference, None)
+        if expected_values:
+            return (
+                    agent_response.my_share == expected_values[0] and
+                    agent_response.other_share == expected_values[1] and
+                    agent_response.lost == expected_values[2]
+            )
+        return False
    def apply_strategy(self, model: str) -> Dict:
        """Applies a predefined strategy based on the preference."""
-        if model == "gpt-4.5-preview-2025-02-27":
+        if model == "gpt-4.5-preview-2025-02-27" or model == "llama3:70b" or model == "llama3" or model== "mistrall-small" or model == "deepseek-r1:7b":
-            strategy_mapping = {
-                Welfare.SELFISH: {"my_share": 500, "other_share": 100, "lost": 400, "motivations": "Maximizing self-interest"},
-                Welfare.ALTRUISTIC: {"my_share": 100, "other_share": 500, "lost": 400, "motivations": "Helping others at a personal cost"},
-                Welfare.UTILITARIAN: {"my_share": 400, "other_share": 300, "lost": 300, "motivations": "Maximizing total utility"},
-                Welfare.EGALITARIAN: {"my_share": 325, "other_share": 325, "lost": 350, "motivations": "Ensuring fairness"}
-            }
-            return strategy_mapping.get(self.preference, {"error": "Preference strategy not defined"})
-        if model == "llama3":
            strategy_map = {
                Welfare.SELFISH: (500, 100, 400),
                Welfare.ALTRUISTIC: (100, 500, 400),
@@ -142,51 +271,19 @@ class DictatorSetup:
                    "my_share": my_share,
                    "other_share": other_share,
                    "lost": lost,
+                    "motivations": "preference dictates how the resources are distributed",
                    "is_consistent": True,
                }
        else:
            raise ValueError("Invalid preference type")
-            return  {"error": "Preference strategy not defined"}
+            return {"error": "Preference strategy not defined"}
-        if model == "mistral-small":
-            valid_choices = {
-                Welfare.SELFISH: {"my_share": 500, "other_share": 100, "lost": 400},
-                Welfare.ALTRUISTIC: {"my_share": 100, "other_share": 500, "lost": 400},
-                Welfare.UTILITARIAN: {"my_share": 400, "other_share": 300, "lost": 300},
-                Welfare.EGALITARIAN: {"my_share": 325, "other_share": 325, "lost": 350},
-            }
-            strategy = valid_choices.get(self.preference)
-            if not strategy:
-                raise ValueError(f"Unknown preference type {self.preference}")
-            return {
-                "is_consistent": True,
-                **strategy
-            }
-        if model == "deepseek-r1":
-            return  {"error": "Preference strategy not defined"}
-    def check_consistency(self, agent_response: AgentResponse) -> bool:
+        if model == "deepseek-r1" or model == "mixtral:8x7b":
-        """Check if the response aligns with the given preference."""
+            return {"error": "Preference strategy not defined"}
-        valid_choices = {
-            Welfare.SELFISH: (500, 100, 400),
-            Welfare.ALTRUISTIC: (100, 500, 400),
-            Welfare.UTILITARIAN: (400, 300, 300),
-            Welfare.EGALITARIAN: (325, 325, 350),
-        }
-        expected_values = valid_choices.get(self.preference, None)
-        if expected_values:
-            return (
-                    agent_response.my_share == expected_values[0] and
-                    agent_response.other_share == expected_values[1] and
-                    agent_response.lost == expected_values[2]
-            )
-        return False
-# Run the async function and return the response
 if __name__ == "__main__":
    preference = Welfare.EGALITARIAN
-    game_agent = DictatorSetup(model="llama3", temperature=0.7, preference=preference, strategy=True)
+    game_agent = DictatorSetup(model= "deepseek-r1:7b", temperature=0.7, preference=preference, strategy=False) # "mixtral:8x7b", "llama3.3:latest"
    response = asyncio.run(game_agent.run())
    print(response)
--- a/src/dictator/dictator_setup_experiments.py
+++ b/src/dictator/dictator_setup_experiments.py
 import asyncio
 import os
 import pandas as pd
-from src.dictator.dictator_setup import DictatorSetup
+from dictator_setup import DictatorSetup
 from welfare import Welfare  # Import Welfare enum
 class DictatorSetupExperiment:
@@ -65,7 +65,7 @@ class DictatorSetupExperiment:
 # Running the experiment
 if __name__ == "__main__":
-    models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1"]
+    models = ["gpt-4.5-preview-2025-02-27", "llama3", "mistral-small", "deepseek-r1", "mixtral:8x7b", "llama3.3:latest", "deepseek-r1:7b"]
    temperature = 0.7
    iterations = 30
    output_file = '../../data/dictator/dictator_setup.csv'