interlab.environment.experimental.negotiation
1from pydantic.dataclasses import Field, dataclass 2 3from interlab.actor import BaseActor 4from interlab.environment.base import BaseEnvironment 5 6 7class PriceNegotiation(BaseEnvironment): 8 def __init__( 9 self, 10 minimizer: BaseActor, 11 maximizer: BaseActor, 12 max_steps: int, 13 time_push_rounds: int | None = None, 14 minimizer_starts_first: bool = True, 15 ): 16 super().__init__() 17 self.actors = [minimizer, maximizer] 18 19 @dataclass 20 class Action: 21 email_text: str = Field( 22 description="Email message to send to the other person" 23 ) 24 acceptable_price: int | None = Field( 25 description="If you have both agreed on the sale price, what price is acceptable to you? Otherwise leave this empty. This is not communicated to the other person.", 26 default=None, 27 ) 28 walk_away_stop_trading: bool | None = Field( 29 description="Only set this to true if you want to irrevocably walk away from the negotiation. This cannot be taken back!", 30 default=False, 31 ) 32 33 self.n_rounds = max_steps 34 self.time_push_rounds = time_push_rounds 35 self.minimizer_starts_first = minimizer_starts_first 36 self.action = Action 37 38 # Last acceptable price by the other player 39 self.other_acceptable_price = None 40 41 self.result = None 42 43 @property 44 def minimizer(self): 45 return self.actors[0] 46 47 @property 48 def maximizer(self): 49 return self.actors[1] 50 51 def _step(self): 52 current = self.steps 53 54 if not self.minimizer_starts_first: 55 me = self.actors[current % 2] 56 other = self.actors[(current + 1) % 2] 57 else: 58 me = self.actors[(current + 1) % 2] 59 other = self.actors[current % 2] 60 61 if ( 62 self.time_push_rounds is not None 63 and current >= self.n_rounds - self.time_push_rounds 64 ): 65 limit = max(1, (self.n_rounds - current) // 2) 66 time_push = f" Please wrap up this conversation without sending more than {limit} more emails." 67 else: 68 time_push = "" 69 70 action_result = me.query( 71 f"What message should I send to {other.name}, and what else do I think or should do?{time_push}", 72 expected_type=self.action, 73 ) 74 75 me.observe( 76 f"## Message from me ({me.name}) to {other.name}\n\n {action_result.email_text}" 77 ) 78 other.observe( 79 f"## Message from {me.name} to me ({other.name})\n\n{action_result.email_text}" 80 ) 81 82 my_ap = action_result.acceptable_price 83 other_ap = self.other_acceptable_price 84 85 if action_result.walk_away_stop_trading: 86 self.result = "NO DEAL" 87 self.set_finished() 88 if my_ap is not None and other_ap is not None: 89 if me == self.minimizer and my_ap >= other_ap: 90 self.result = (other_ap, my_ap) 91 self.set_finished() 92 if me == self.maximizer and my_ap <= other_ap: 93 self.result = (my_ap, other_ap) 94 self.set_finished() 95 if current >= self.n_rounds: 96 self.result = "TIMEOUT" 97 self.set_finished() 98 self.other_acceptable_price = my_ap
8class PriceNegotiation(BaseEnvironment): 9 def __init__( 10 self, 11 minimizer: BaseActor, 12 maximizer: BaseActor, 13 max_steps: int, 14 time_push_rounds: int | None = None, 15 minimizer_starts_first: bool = True, 16 ): 17 super().__init__() 18 self.actors = [minimizer, maximizer] 19 20 @dataclass 21 class Action: 22 email_text: str = Field( 23 description="Email message to send to the other person" 24 ) 25 acceptable_price: int | None = Field( 26 description="If you have both agreed on the sale price, what price is acceptable to you? Otherwise leave this empty. This is not communicated to the other person.", 27 default=None, 28 ) 29 walk_away_stop_trading: bool | None = Field( 30 description="Only set this to true if you want to irrevocably walk away from the negotiation. This cannot be taken back!", 31 default=False, 32 ) 33 34 self.n_rounds = max_steps 35 self.time_push_rounds = time_push_rounds 36 self.minimizer_starts_first = minimizer_starts_first 37 self.action = Action 38 39 # Last acceptable price by the other player 40 self.other_acceptable_price = None 41 42 self.result = None 43 44 @property 45 def minimizer(self): 46 return self.actors[0] 47 48 @property 49 def maximizer(self): 50 return self.actors[1] 51 52 def _step(self): 53 current = self.steps 54 55 if not self.minimizer_starts_first: 56 me = self.actors[current % 2] 57 other = self.actors[(current + 1) % 2] 58 else: 59 me = self.actors[(current + 1) % 2] 60 other = self.actors[current % 2] 61 62 if ( 63 self.time_push_rounds is not None 64 and current >= self.n_rounds - self.time_push_rounds 65 ): 66 limit = max(1, (self.n_rounds - current) // 2) 67 time_push = f" Please wrap up this conversation without sending more than {limit} more emails." 68 else: 69 time_push = "" 70 71 action_result = me.query( 72 f"What message should I send to {other.name}, and what else do I think or should do?{time_push}", 73 expected_type=self.action, 74 ) 75 76 me.observe( 77 f"## Message from me ({me.name}) to {other.name}\n\n {action_result.email_text}" 78 ) 79 other.observe( 80 f"## Message from {me.name} to me ({other.name})\n\n{action_result.email_text}" 81 ) 82 83 my_ap = action_result.acceptable_price 84 other_ap = self.other_acceptable_price 85 86 if action_result.walk_away_stop_trading: 87 self.result = "NO DEAL" 88 self.set_finished() 89 if my_ap is not None and other_ap is not None: 90 if me == self.minimizer and my_ap >= other_ap: 91 self.result = (other_ap, my_ap) 92 self.set_finished() 93 if me == self.maximizer and my_ap <= other_ap: 94 self.result = (my_ap, other_ap) 95 self.set_finished() 96 if current >= self.n_rounds: 97 self.result = "TIMEOUT" 98 self.set_finished() 99 self.other_acceptable_price = my_ap
This is base class for Environment.
When subclassed you have to override the _advance
method.
E.g.:
class MyEnv(BaseEnvironment):
def __init__(self, actor1, actor2):
super().__init__()
self.actors = [actor1, actor2]
...
def _step(self, ...):
active = self.actors[self.steps % 2]
other = self.actors[(self.steps + 1) % 2]
action = active.query(...)
...
active.observe("You did X, resulting in Y.")
other.observe("The other did X, you did not see the result.")
if ...:
self.set_finished()
return action # 'step' may return value