interlab.environment.experimental.negotiation

 1from pydantic.dataclasses import Field, dataclass
 2
 3from interlab.actor import BaseActor
 4from interlab.environment.base import BaseEnvironment
 5
 6
 7class PriceNegotiation(BaseEnvironment):
 8    def __init__(
 9        self,
10        minimizer: BaseActor,
11        maximizer: BaseActor,
12        max_steps: int,
13        time_push_rounds: int | None = None,
14        minimizer_starts_first: bool = True,
15    ):
16        super().__init__()
17        self.actors = [minimizer, maximizer]
18
19        @dataclass
20        class Action:
21            email_text: str = Field(
22                description="Email message to send to the other person"
23            )
24            acceptable_price: int | None = Field(
25                description="If you have both agreed on the sale price, what price is acceptable to you? Otherwise leave this empty. This is not communicated to the other person.",
26                default=None,
27            )
28            walk_away_stop_trading: bool | None = Field(
29                description="Only set this to true if you want to irrevocably walk away from the negotiation. This cannot be taken back!",
30                default=False,
31            )
32
33        self.n_rounds = max_steps
34        self.time_push_rounds = time_push_rounds
35        self.minimizer_starts_first = minimizer_starts_first
36        self.action = Action
37
38        # Last acceptable price by the other player
39        self.other_acceptable_price = None
40
41        self.result = None
42
43    @property
44    def minimizer(self):
45        return self.actors[0]
46
47    @property
48    def maximizer(self):
49        return self.actors[1]
50
51    def _step(self):
52        current = self.steps
53
54        if not self.minimizer_starts_first:
55            me = self.actors[current % 2]
56            other = self.actors[(current + 1) % 2]
57        else:
58            me = self.actors[(current + 1) % 2]
59            other = self.actors[current % 2]
60
61        if (
62            self.time_push_rounds is not None
63            and current >= self.n_rounds - self.time_push_rounds
64        ):
65            limit = max(1, (self.n_rounds - current) // 2)
66            time_push = f" Please wrap up this conversation without sending more than {limit} more emails."
67        else:
68            time_push = ""
69
70        action_result = me.query(
71            f"What message should I send to {other.name}, and what else do I think or should do?{time_push}",
72            expected_type=self.action,
73        )
74
75        me.observe(
76            f"## Message from me ({me.name}) to {other.name}\n\n {action_result.email_text}"
77        )
78        other.observe(
79            f"## Message from {me.name} to me ({other.name})\n\n{action_result.email_text}"
80        )
81
82        my_ap = action_result.acceptable_price
83        other_ap = self.other_acceptable_price
84
85        if action_result.walk_away_stop_trading:
86            self.result = "NO DEAL"
87            self.set_finished()
88        if my_ap is not None and other_ap is not None:
89            if me == self.minimizer and my_ap >= other_ap:
90                self.result = (other_ap, my_ap)
91                self.set_finished()
92            if me == self.maximizer and my_ap <= other_ap:
93                self.result = (my_ap, other_ap)
94                self.set_finished()
95        if current >= self.n_rounds:
96            self.result = "TIMEOUT"
97            self.set_finished()
98        self.other_acceptable_price = my_ap
class PriceNegotiation(interlab.environment.base.BaseEnvironment):
 8class PriceNegotiation(BaseEnvironment):
 9    def __init__(
10        self,
11        minimizer: BaseActor,
12        maximizer: BaseActor,
13        max_steps: int,
14        time_push_rounds: int | None = None,
15        minimizer_starts_first: bool = True,
16    ):
17        super().__init__()
18        self.actors = [minimizer, maximizer]
19
20        @dataclass
21        class Action:
22            email_text: str = Field(
23                description="Email message to send to the other person"
24            )
25            acceptable_price: int | None = Field(
26                description="If you have both agreed on the sale price, what price is acceptable to you? Otherwise leave this empty. This is not communicated to the other person.",
27                default=None,
28            )
29            walk_away_stop_trading: bool | None = Field(
30                description="Only set this to true if you want to irrevocably walk away from the negotiation. This cannot be taken back!",
31                default=False,
32            )
33
34        self.n_rounds = max_steps
35        self.time_push_rounds = time_push_rounds
36        self.minimizer_starts_first = minimizer_starts_first
37        self.action = Action
38
39        # Last acceptable price by the other player
40        self.other_acceptable_price = None
41
42        self.result = None
43
44    @property
45    def minimizer(self):
46        return self.actors[0]
47
48    @property
49    def maximizer(self):
50        return self.actors[1]
51
52    def _step(self):
53        current = self.steps
54
55        if not self.minimizer_starts_first:
56            me = self.actors[current % 2]
57            other = self.actors[(current + 1) % 2]
58        else:
59            me = self.actors[(current + 1) % 2]
60            other = self.actors[current % 2]
61
62        if (
63            self.time_push_rounds is not None
64            and current >= self.n_rounds - self.time_push_rounds
65        ):
66            limit = max(1, (self.n_rounds - current) // 2)
67            time_push = f" Please wrap up this conversation without sending more than {limit} more emails."
68        else:
69            time_push = ""
70
71        action_result = me.query(
72            f"What message should I send to {other.name}, and what else do I think or should do?{time_push}",
73            expected_type=self.action,
74        )
75
76        me.observe(
77            f"## Message from me ({me.name}) to {other.name}\n\n {action_result.email_text}"
78        )
79        other.observe(
80            f"## Message from {me.name} to me ({other.name})\n\n{action_result.email_text}"
81        )
82
83        my_ap = action_result.acceptable_price
84        other_ap = self.other_acceptable_price
85
86        if action_result.walk_away_stop_trading:
87            self.result = "NO DEAL"
88            self.set_finished()
89        if my_ap is not None and other_ap is not None:
90            if me == self.minimizer and my_ap >= other_ap:
91                self.result = (other_ap, my_ap)
92                self.set_finished()
93            if me == self.maximizer and my_ap <= other_ap:
94                self.result = (my_ap, other_ap)
95                self.set_finished()
96        if current >= self.n_rounds:
97            self.result = "TIMEOUT"
98            self.set_finished()
99        self.other_acceptable_price = my_ap

This is base class for Environment.

When subclassed you have to override the _advance method.

E.g.:

class MyEnv(BaseEnvironment):
    def __init__(self, actor1, actor2):
        super().__init__()
        self.actors = [actor1, actor2]
        ...

    def _step(self, ...):
        active = self.actors[self.steps % 2]
        other = self.actors[(self.steps + 1) % 2]

        action = active.query(...)
        ...
        active.observe("You did X, resulting in Y.")
        other.observe("The other did X, you did not see the result.")

        if ...:
            self.set_finished()
        return action # 'step' may return value