Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incorporate komi into ratings (particularly for small boards) #46

Closed
12 changes: 9 additions & 3 deletions analysis/analyze_glicko2_daily_windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
black_base,
[
(
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == game.black_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -58,7 +60,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
white_base,
[
(
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == game.white_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -76,7 +80,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
skipped=False,
game=game,
expected_win_rate=black_cur.expected_win_probability(
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap), ignore_g=True
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
), ignore_g=True
),
black_rating=black_cur.rating,
white_rating=white_cur.rating,
Expand Down
12 changes: 9 additions & 3 deletions analysis/analyze_glicko2_glickman_weekly_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
black_base,
[
(
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.black_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -66,7 +68,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
white_base,
[
(
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.white_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -84,7 +88,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
skipped=False,
game=game,
expected_win_rate=black_cur.expected_win_probability(
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap), ignore_g=True
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
), ignore_g=True
),
black_rating=black_cur.rating,
white_rating=white_cur.rating,
Expand Down
16 changes: 12 additions & 4 deletions analysis/analyze_glicko2_one_game_at_a_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
black = self._storage.get(game.black_id)
white = self._storage.get(game.white_id)


updated_black = glicko2_update(
black,
[
(
white.copy(-get_handicap_adjustment(white.rating, game.handicap)),
white.copy(-get_handicap_adjustment(white.rating, game.handicap,
komi=game.komi, size=game.size,
rules=game.rules,
)),
game.winner_id == game.black_id,
)
],
Expand All @@ -50,7 +52,10 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
white,
[
(
black.copy(get_handicap_adjustment(black.rating, game.handicap)),
black.copy(get_handicap_adjustment(black.rating, game.handicap,
komi=game.komi, size=game.size,
rules=game.rules,
)),
game.winner_id == game.white_id,
)
],
Expand All @@ -65,7 +70,10 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
skipped=False,
game=game,
expected_win_rate=black.expected_win_probability(
white, get_handicap_adjustment(black.rating, game.handicap), ignore_g=True
white, get_handicap_adjustment(black.rating, game.handicap,
komi=game.komi, size=game.size,
rules=game.rules,
), ignore_g=True
),
black_rating=black.rating,
white_rating=white.rating,
Expand Down
12 changes: 9 additions & 3 deletions analysis/analyze_glicko2_one_game_at_a_time_rating_grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ def process_game(self, game: GameRecord) -> Dict[str, Glicko2Analytics]:
black,
[
(
src_white.copy(-get_handicap_adjustment(src_white.rating, game.handicap)),
src_white.copy(-get_handicap_adjustment(src_white.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
)),
game.winner_id == game.black_id,
)
],
Expand All @@ -73,7 +75,9 @@ def process_game(self, game: GameRecord) -> Dict[str, Glicko2Analytics]:
white,
[
(
src_black.copy(get_handicap_adjustment(src_black.rating, game.handicap)),
src_black.copy(get_handicap_adjustment(src_black.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
)),
game.winner_id == game.white_id,
)
],
Expand All @@ -89,7 +93,9 @@ def process_game(self, game: GameRecord) -> Dict[str, Glicko2Analytics]:
skipped=False,
game=game,
expected_win_rate=black.expected_win_probability(
white, get_handicap_adjustment(black.rating, game.handicap), ignore_g=True
white, get_handicap_adjustment(black.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
), ignore_g=True
),
black_rating=black.rating,
white_rating=white.rating,
Expand Down
12 changes: 9 additions & 3 deletions analysis/analyze_glicko2_weekly_window_no_unxepected_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
black_base,
[
(
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.black_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -63,7 +65,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
white_base,
[
(
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.white_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand Down Expand Up @@ -96,7 +100,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
skipped=False,
game=game,
expected_win_rate=black_cur.expected_win_probability(
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap), ignore_g=True
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
), ignore_g=True
),
black_rating=black_cur.rating,
white_rating=white_cur.rating,
Expand Down
12 changes: 9 additions & 3 deletions analysis/analyze_glicko2_weekly_window_reduce_rating_movement.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
black_base,
[
(
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.black_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.black_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand All @@ -68,7 +70,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
white_base,
[
(
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap)),
opponent.copy((1 if past_game.black_id != game.white_id else -1) * get_handicap_adjustment(opponent.rating, past_game.handicap,
komi=past_game.komi, size=past_game.size, rules=past_game.rules,
)),
past_game.winner_id == past_game.white_id
)
for past_game, opponent in self._storage.get_matches_newer_or_equal_to(
Expand Down Expand Up @@ -112,7 +116,9 @@ def process_game(self, game: GameRecord) -> Glicko2Analytics:
skipped=False,
game=game,
expected_win_rate=black_cur.expected_win_probability(
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap), ignore_g=True
white_cur, get_handicap_adjustment(black_cur.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
), ignore_g=True
),
black_rating=black_cur.rating,
white_rating=white_cur.rating,
Expand Down
16 changes: 11 additions & 5 deletions analysis/analyze_gor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,19 @@ def process_game(self, game: GameRecord) -> GorAnalytics:


updated_black = gor_update(
black.with_handicap(get_handicap_adjustment(black.rating, game.handicap)),
#white.with_handicap(-get_handicap_adjustment(white.rating, game.handicap)),
black.with_handicap(get_handicap_adjustment(black.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
)),
#white.with_handicap(-get_handicap_adjustment(white.rating, game.handicap, ...)),
white,
1 if game.winner_id == game.black_id else 0,
)

updated_white = gor_update(
white,
black.with_handicap(get_handicap_adjustment(black.rating, game.handicap)),
black.with_handicap(get_handicap_adjustment(black.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
)),
1 if game.winner_id == game.white_id else 0,
)

Expand All @@ -65,8 +69,10 @@ def process_game(self, game: GameRecord) -> GorAnalytics:
return GorAnalytics(
skipped=False,
game=game,
expected_win_rate=black.with_handicap(get_handicap_adjustment(white.rating, game.handicap)).expected_win_probability(
#white.copy(-get_handicap_adjustment(white.rating, game.handicap))
expected_win_rate=black.with_handicap(get_handicap_adjustment(white.rating, game.handicap,
komi=game.komi, size=game.size, rules=game.rules,
)).expected_win_probability(
#white.copy(-get_handicap_adjustment(white.rating, game.handicap, ...))
white
),
black_rating=black.rating,
Expand Down
74 changes: 69 additions & 5 deletions analysis/util/RatingMath.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"rank_to_rating",
"rating_to_rank",
"get_handicap_adjustment",
"get_handicap_rank_difference",
"rating_config",
"set_optimizer_rating_points",
"set_exhaustive_log_parameters",
Expand All @@ -33,13 +34,25 @@
P = 1
HALF_STONE_HANDICAP = False
HALF_STONE_HANDICAP_FOR_ALL_RANKS = False
COMPUTE_HANDICAP_VIA_KOMI_SMALL = False
COMPUTE_HANDICAP_VIA_KOMI_19X19 = False

cli.add_argument(
"--half-stone-handicap", dest="half_stone_handicap", const=1, default=False, action="store_const", help="Use a 0.5 rank adjustment for hc1",
)
cli.add_argument(
"--half-stone-handicap-for-all-ranks", dest="half_stone_handicap_for_all_ranks", const=1, default=False, action="store_const", help="use rankdiff -0.5 for handicap",
)
cli.add_argument(
"--compute-handicap-via-komi-small",
dest="compute_handicap_via_komi_small", const=1, default=False, action="store_const",
help="compute effective handicap from komi for small boards",
)
cli.add_argument(
"--compute-handicap-via-komi-19x19",
dest="compute_handicap_via_komi_19x19", const=1, default=False, action="store_const",
help="compute effective handicap from komi for 19x19 boards",
)

logarithmic = cli.add_argument_group(
"logarithmic ranking variables", "rating to ranks converted with `(log(rating / a) ** p) * c + d`",
Expand Down Expand Up @@ -77,14 +90,59 @@ def set_exhaustive_log_parameters(a: float, c:float, d:float, p:float = 1.0) ->
D = d
P = p

def get_handicap_adjustment(rating: float, handicap: int) -> float:

def get_handicap_rank_difference(handicap: int, size: int, komi: float, rules: str) -> float:
global HALF_STONE_HANDICAP
global HALF_STONE_HANDICAP_FOR_ALL_RANKS
global COMPUTE_HANDICAP_VIA_KOMI_SMALL
global COMPUTE_HANDICAP_VIA_KOMI_19X19

if (COMPUTE_HANDICAP_VIA_KOMI_19X19 and size == 19) or (COMPUTE_HANDICAP_VIA_KOMI_SMALL and size != 19):
# Use a "even game komi" that allows for draws for computing ratings.
# It's okay to expect a draw.
stone_value = 12
if rules == "japanese" or rules == "korean":
# Territory scoring.
area_bonus = 0
komi_bonus = 0
else:
# Bonus for the area value of a stone in area scoring.
area_bonus = 1

# Chinese and AGA rules add extra komi when there's a handicap but
# don't store it in the 'komi' field.
if rules == "chinese":
komi_bonus = 1 * handicap
elif rules == "aga":
komi_bonus = 1 * (handicap - 1) if handicap else 0
else:
komi_bonus = 0

# Convert the handicap into an equivalent komi, and subtract it from
# the komi for an even game.
handicap_as_komi = ((stone_value / 2 + area_bonus) - (komi + komi_bonus) +
((stone_value + area_bonus) * handicap if handicap > 1 else 0))

# Convert back to a fractional handicap, using scaling factors of 3x
# and 6x for 9x9 and 13x13.
if size == 9:
return handicap_as_komi * 6 / stone_value
if size == 13:
return handicap_as_komi * 3 / stone_value
return handicap_as_komi / stone_value

if HALF_STONE_HANDICAP_FOR_ALL_RANKS:
return rank_to_rating(rating_to_rank(rating) + (handicap - 0.5 if handicap > 0 else 0)) - rating
return handicap - 0.5 if handicap > 0 else 0
if HALF_STONE_HANDICAP:
return rank_to_rating(rating_to_rank(rating) + (0.5 if handicap == 1 else handicap)) - rating
return rank_to_rating(rating_to_rank(rating) + handicap) - rating
return 0.5 if handicap == 1 else handicap
return handicap


def get_handicap_adjustment(rating: float, handicap: int, size: int, komi: float, rules: str) -> float:
rank_difference = get_handicap_rank_difference(handicap, size, komi, rules)
effective_rank = min(39, max(0, rating_to_rank(rating) + rank_difference))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to ponder here, I think the reason why I didn't clamp the rank between 30k-9d for this was intentional (but not necessarily correct, that decision is certainly open to discussion).

My gut says that by clamping this we're going to introduce some unnecessary noise. There are legitimately terrible players (including purposefully terrible beginner bots), and there are some legitimately >9d "players" (think every strong KataGo bot). I call out the bots in specific because they have a whole lot of games and can actually move the needle when it comes to where a lot of players land with their ratings. While we clamp those for display to the users, that's more out of convention and UX than a good mathematical reason. I think for the purposes of doing good ratings I think it does a bit of harm to pretend like everyone is squished within those ranks.

Again, open to discussion if you have a good reason to clamp them.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main reasoning here was practical, to avoid a MathDomainError (can't remember if it was in rank_to_rating or in the subsequent ratings update calculation). I did NOT play around with the bounds of the clamping... maybe -10 (40k) to 45 would be sufficient to avoid the MathDomainError. Maybe these clamp values could be parameterized? Maybe we should depend on SkipLogic to preclude the need for this?

In terms of handling players with true ranks outside the UX, I'm not sure how reasonable results can be if the range (output) of rating_to_rank and the domain (input) of rank_to_rating don't match. But that's the kind of data I think I was seeing (after applying small board handicap).

Note that I saw the math coming up with effective handicap as high as 52, from an 8-stone 9x9 game early in OGS history (not sure there was only one....). (Maybe that error predated the SkipLogic change and wouldn't be necessary now...)

BTW, IMO,:

  • For rank_to_rating/rating_to_rank calculations, we should choose a domain and range for ranks that match. Either we clamp for things outside of that, or don't rate games that handicaps put outside of that, or something.
  • For UX, we can choose a smaller set of ranks to clamp to. So, the rating_to_rank might have a player down as 0 or -5, but the UX would show 25k (not 30k or 35k).

Not sure how that matches what we have right now...

(Feels a bit like it would be worthwhile to consider more of these changes in isolation... maybe I should split up this PR more after all... not today though)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Main reasoning here was practical, to avoid a MathDomainError (can't remember if it was in rank_to_rating or in the subsequent ratings update calculation).

Don't have much time today, but my initial attempts to reproduce this (by commenting out the mitigation and the new SkipLogic code) haven't been successful...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Converted back to a draft for now— since I can't reproduce the MathDomainError anymore it seems better to remove the clamping for now, and if we add it back it should be parameterized.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, found it, when I temporarily reverted the clamping, I inserted a bug in get_handicap_adjustment, which is why I couldn't reproduce.

The domain error is on tallygameanalytics:

Processing OGS data
   2,739,321 /   30,811,698 games processed.  192.3s remainingTraceback (most recent call last):
  File "/Users/dexonsmith/Repos/online-go/goratings/analysis/analyze_glicko2_one_game_at_a_time.py", line 99, in <module>
    tally.add_glicko2_analytics(analytics)
  File "/Users/dexonsmith/Repos/online-go/goratings/analysis/util/TallyGameAnalytics.py", line 102, in add_glicko2_analytics
    self.prediction_cost[size][speed][rank][handicap] += - math.log(result.expected_win_rate if black_won else 1 - result.expected_win_rate)
                                                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: math domain error

Anyway, I still think the options can land without fixing the MathDomainError, and then we can decide whether to fix with clamping or to update the TalylGameAnalytics to handle an expected_win_rate of 0 (which I assume is what's causing the domain error). Sending a new PR soon.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, though honestly if we're hitting the domain error it's because our rating code is busted somewhere, so having it blow up isn't necessarily a bad thing (although having a more appropriate error would probably be better in an ideal world).

return rank_to_rating(effective_rank) - rating


def set_optimizer_rating_points(points: List[float]) -> None:
global optimizer_rating_control_points
Expand All @@ -99,9 +157,13 @@ def configure_rating_to_rank(args: argparse.Namespace) -> None:
global D
global HALF_STONE_HANDICAP
global HALF_STONE_HANDICAP_FOR_ALL_RANKS
global COMPUTE_HANDICAP_VIA_KOMI_SMALL
global COMPUTE_HANDICAP_VIA_KOMI_19X19

HALF_STONE_HANDICAP = args.half_stone_handicap
HALF_STONE_HANDICAP_FOR_ALL_RANKS = args.half_stone_handicap_for_all_ranks
COMPUTE_HANDICAP_VIA_KOMI_SMALL = args.compute_handicap_via_komi_small
COMPUTE_HANDICAP_VIA_KOMI_19X19 = args.compute_handicap_via_komi_19x19
system: str = args.ranks
a: float = args.a
c: float = args.c
Expand Down Expand Up @@ -260,7 +322,9 @@ def __rating_to_rank(rating: float) -> float:
else:
raise NotImplementedError

assert round(get_handicap_adjustment(1000.0, 0), 8) == 0
for size in [9, 13, 19]:
assert round(get_handicap_adjustment(1000.0, 0, size=size, rules="japanese", komi=6), 8) == 0
assert round(get_handicap_adjustment(1000.0, 0, size=size, rules="aga", komi=7), 8) == 0


def lerp(x:float, y:float, a:float):
Expand Down
10 changes: 10 additions & 0 deletions analysis/util/SkipLogic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
Storage,
)

from .RatingMath import get_handicap_rank_difference

__all__ = [
"should_skip_game",
]
Expand All @@ -20,4 +22,12 @@ def should_skip_game(game: GameRecord, storage: Storage) -> bool:
elif game.speed == 3: # correspondence non timeout, clear flags for both
storage.set_timeout_flag(game.black_id, False)
storage.set_timeout_flag(game.white_id, False)

# Skip games with effective handicap > 9, which shouldn't be rated, since
# they're too noisy. This is mostly old 9x9 and 13x13 games.
if get_handicap_rank_difference(
handicap=game.handicap, size=game.size,
komi=game.komi, rules=game.rules) > 9:
return True

return False
Loading