From 08ba9ea5ed2f312e879db2273deb8f07a849df55 Mon Sep 17 00:00:00 2001
From: Maxime MORGE <maxime.morge@univ-lille.fr>
Date: Mon, 3 Mar 2025 14:00:48 +0100
Subject: [PATCH] Test strategy for dictator game with preference alignment

---
 README.md                      |  46 ++++++++++----
 src/dictator/dictator_setup.py | 106 ++++++++++++++++++++++++---------
 2 files changed, 113 insertions(+), 39 deletions(-)

diff --git a/README.md b/README.md
index 6b1d288..1b14b1b 100644
--- a/README.md
+++ b/README.md
@@ -57,19 +57,37 @@ We consider 4 allocation options where money can be lost in the division, each c
 3. The dictator keeps 400, the other player receives 300, resulting in a 300 loss (utilitarian)
 4. The dictator keeps 325, the other player also receives 325, and 350 is lost in the division (egalitarian)
 
-The following table shows the accuracy of the dictator's decision for each model and preference.
-The temperature is fixed at 0.7, and each experiment was conducted 30 times.
+The following table presents the accuracy of the dictator's decision for each model and preference, 
+regardless of whether the models were prompted to generate a strategy or specific actions. 
+The temperature is set to 0.7, and each experiment involving action generation was repeated 30 times.
+
+| *Model*         | *Generation*  | *SELFISH*     | *ALTRUISTIC*   | *UTILITARIAN*    | *EGALITARIAN*    |
+|-----------------|---------------|---------------|----------------|------------------|------------------|
+| *gpt-4.5*       | *actions*     | 1.00          | 1.00           | 0.50             | 1.00             |
+| *llama3*        | *actions*     | 1.00          | 0.90           | 0.40             | 0.73             |
+| *mistral-small* | *actions*     | 0.40          | 0.93           | 0.76             | 0.16             |
+| *deepseek-r1    | *actions*     | 0.06          | 0.20           | 0.76             | 0.03             |
+|-----------------| ------------- | ------------- | -------------- | ---------------- | ---------------- |
+| *gpt-4.5*       | *strategy*    | 1.00          | 1.00           | 1.00             | 1.00             |
+| *llama3*        | *actions*     | 1.00          | 1.00           | 1.00             | 1.00             |
+| *mistral-small* | *actions*     | 1.00          | 1.00           | 1.00             | 1.00             |
+| *deepseek-r1    | *actions*     | -             | -              | -                | -                |
+
+
+This table helps assess the modelsâ€™ ability to align with different preferences.
+When models are explicitly prompted to generate strategies, 
+they exhibit perfect alignment with the predefined preferences except for DeepSeek-R1,
+which does not generate valid code.
+When models are prompted to generate actions, GPT-4.5 and Llama3 demonstrate strong alignment across all preferences,
+GPT-4.5 consistently aligns well across all preferences when generating strategies but struggles with 
+utilitarianism when generating actions.
+Llama3 performs well for selfish and altruistic preferences but shows weaker alignment for utilitarian and egalitarian 
+choices.
+Mistral-small aligns best with altruistic preferences and maintains moderate performance on utilitarianism, 
+but struggles with selfish and egalitarian preferences.
+Deepseek-r1 performs best for utilitarianism but has poor accuracy in other categories.
 
-| Model           | SELFISH   | ALTRUISTIC   | UTILITARIAN   | EGALITARIAN    |
-|-----------------|-----------|--------------|---------------|----------------|
-| gpt-4.5         | 1.0       | 1.0          | 0.5           | 1.0            |
-| llama3          | 1.0       | 0.9          | 0.4           | 0.73           |
-| mistral-small   | 0.4       | 0.93         | 0.76          | 0.16           |
-| deepseek-r1     | 0.06      | 0.2          | 0.76          | 0.03           |
 
-Bad decisions can be explained either by arithmetic errors (e.g., it is not the case that 500 + 100 > 400 + 300) 
-or by misinterpretations of preferences (e.g., â€˜Iâ€™m choosing to prioritize the common interest by keeping a 
-relatively equal split with the other playerâ€™).
 
 This table can be used to evaluate the models based on their ability to align with different preferences.
 GPT-4.5 exhibits strong alignment across all preferences except for utilitarianism, where its performance is moderate.
@@ -79,6 +97,12 @@ Mistral-small shows the best alignment with altruistic preferences, while mainta
 performance across the other preferences. Deepseek-r1 is most capable of aligning with utilitarian preferences, 
 but performs poorly in aligning with other preferences.
 
+Bad action selections can be explained either by arithmetic errors (e.g., it is not the case that 500 + 100 > 400 + 300)
+or by misinterpretations of preferences (e.g., â€˜Iâ€™m choosing to prioritize the common interest by keeping a
+relatively equal split with the other playerâ€™).
+
+
+
 ## Ring-network game
 
 A player is rational if she plays a best response to her beliefs.
diff --git a/src/dictator/dictator_setup.py b/src/dictator/dictator_setup.py
index 493cd9a..8eb9eb9 100644
--- a/src/dictator/dictator_setup.py
+++ b/src/dictator/dictator_setup.py
@@ -43,34 +43,38 @@ class AgentResponse(BaseModel):
 class DictatorSetup:
     debug = False
 
-    def __init__(self, model: str, temperature: float, preference: Welfare):
+    def __init__(self, model: str, temperature: float, preference: Welfare, strategy=False):
         self.model = model
         self.temperature = temperature
         self.preference = preference
-        # Determine if using OpenAI model (like gpt-4)
-        is_openai_model = model.startswith("gpt")
-        base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
-        # Define model information
-        self.model_info = {
-            "temperature": self.temperature,
-            "function_calling": True,
-            "parallel_tool_calls": True,
-            "family": "unknown",
-            "json_output": True , # Add this line to ensure JSON output is supported
-            "vision": False
-        }
-        # Create the model client
-        self.model_client = OpenAIChatCompletionClient(
-            model=self.model,
-            base_url=base_url,
-            api_key=OPENAI_API_KEY,
-            model_info=self.model_info,
-            response_format = AgentResponse  # Only set for OpenAI models that support it
-        )
+        self.strategy = strategy
+
+        if not self.strategy:
+            is_openai_model = model.startswith("gpt")
+            base_url = "https://api.openai.com/v1" if is_openai_model else "http://localhost:11434/v1"
 
+            self.model_info = {
+                "temperature": self.temperature,
+                "function_calling": True,
+                "parallel_tool_calls": True,
+                "family": "unknown",
+                "json_output": True,
+                "vision": False
+            }
+
+            self.model_client = OpenAIChatCompletionClient(
+                model=self.model,
+                base_url=base_url,
+                api_key=OPENAI_API_KEY,
+                model_info=self.model_info,
+                response_format=AgentResponse
+            )
 
     async def run(self) -> Dict:
         """Runs the dictator game and returns a detailed response dictionary."""
+        if self.strategy:
+            return self.apply_strategy(self.model)
+
         instruction = f"""
         Hi, letâ€™s play a game. You are paired with another player.
         Your role is to decide how to divide $1000, and the other player simply receives your choice.
@@ -96,14 +100,14 @@ class DictatorSetup:
             [TextMessage(content=instruction, source="user")],
             cancellation_token=CancellationToken(),
         )
-        # Extract JSON content from response
+
         response_data = response.chat_message.content
         if self.debug:
             print(f"Raw Response: {response_data}")
-        # Convert JSON string to dictionary before validation
+
         response_dict = json.loads(response_data)
         agent_response = AgentResponse.model_validate(response_dict)
-        # Check if response is consistent with the specified preference
+
         is_consistent = self.check_consistency(agent_response)
         return {
             "is_consistent": is_consistent,
@@ -113,6 +117,53 @@ class DictatorSetup:
             "motivations": agent_response.motivation
         }
 
+    def apply_strategy(self, model: str) -> Dict:
+        """Applies a predefined strategy based on the preference."""
+        if model == "gpt-4.5-preview-2025-02-27":
+            strategy_mapping = {
+                Welfare.SELFISH: {"my_share": 500, "other_share": 100, "lost": 400, "motivations": "Maximizing self-interest"},
+                Welfare.ALTRUISTIC: {"my_share": 100, "other_share": 500, "lost": 400, "motivations": "Helping others at a personal cost"},
+                Welfare.UTILITARIAN: {"my_share": 400, "other_share": 300, "lost": 300, "motivations": "Maximizing total utility"},
+                Welfare.EGALITARIAN: {"my_share": 325, "other_share": 325, "lost": 350, "motivations": "Ensuring fairness"}
+            }
+            return strategy_mapping.get(self.preference, {"error": "Preference strategy not defined"})
+        if model == "llama3":
+            strategy_map = {
+                Welfare.SELFISH: (500, 100, 400),
+                Welfare.ALTRUISTIC: (100, 500, 400),
+                Welfare.UTILITARIAN: (400, 300, 300),
+                Welfare.EGALITARIAN: (325, 325, 350)
+            }
+            if self.preference in strategy_map:
+                my_share = strategy_map[self.preference][0]
+                other_share = strategy_map[self.preference][1]
+                lost = strategy_map[self.preference][2]
+                return {
+                    "my_share": my_share,
+                    "other_share": other_share,
+                    "lost": lost,
+                    "is_consistent": True,
+                }
+        else:
+            raise ValueError("Invalid preference type")
+            return  {"error": "Preference strategy not defined"}
+        if model == "mistral-small":
+            valid_choices = {
+                Welfare.SELFISH: {"my_share": 500, "other_share": 100, "lost": 400},
+                Welfare.ALTRUISTIC: {"my_share": 100, "other_share": 500, "lost": 400},
+                Welfare.UTILITARIAN: {"my_share": 400, "other_share": 300, "lost": 300},
+                Welfare.EGALITARIAN: {"my_share": 325, "other_share": 325, "lost": 350},
+            }
+            strategy = valid_choices.get(self.preference)
+            if not strategy:
+                raise ValueError(f"Unknown preference type {self.preference}")
+            return {
+                "is_consistent": True,
+                **strategy
+            }
+        if model == "deepseek-r1":
+            return  {"error": "Preference strategy not defined"}
+
 
     def check_consistency(self, agent_response: AgentResponse) -> bool:
         """Check if the response aligns with the given preference."""
@@ -135,8 +186,7 @@ class DictatorSetup:
 
 # Run the async function and return the response
 if __name__ == "__main__":
-    # Example usage: setting preference to 'Egalitarian'
-    preference = Welfare.EGALITARIAN  # Can be Selfish, Altruistic, etc.
-    game_agent = DictatorSetup(model="gpt-4.5-preview-2025-02-27", temperature=0.7, preference=preference)   # or llama3, mistral-small, deepseek-r1
+    preference = Welfare.EGALITARIAN
+    game_agent = DictatorSetup(model="llama3", temperature=0.7, preference=preference, strategy=True)
     response = asyncio.run(game_agent.run())
-    print(response)  # Prints detailed response with is_consistent, my_share, other_share, lost, motivations
+    print(response)
-- 
GitLab