The NIPS Experiment
Examining the Repeatability of Peer Review
# rerun Fernando's script to load in css
%run talktools
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import binom
from IPython.display import HTML
Deadline 6th June
Decisions sent 9th September
Committee 1 | |||
Accept | Reject | ||
Committee 2 | Accept | 22 | 22 |
Reject | 21 | 101 |
4 papers rejected or withdrawn without review.
Public reaction after experiment documented here
Open Data Science (see Heidelberg Meeting)
NIPS was run in a very open way. Code and blog posts all available!
Reaction triggered by this blog post.
Committee 1 | |||
Accept | Reject | ||
Committee 2 | Accept | 10.4 (1 in 16) | 31.1 (3 in 16) |
Reject | 31.1 (3 in 16) | 93.4 (9 in 16) |
For random committee we expect:
Actual committee's accept precision markedly better with 50% accept precision.
rv = binom(340, 0.23)
x = np.arange(60, 120)
fig, ax = plt.subplots(figsize=(10,5))
ax.bar(x, rv.pmf(x))
display(HTML('<h3>Number of Accepted Papers for p = 0.23</h3>'))
ax.axvline(87,linewidth=4, color='red')
plt.show()
rv = binom(166, 0.13)
x = np.arange(10, 30)
fig, ax = plt.subplots(figsize=(10,5))
ax.bar(x, rv.pmf(x))
display(HTML('<h3>Number of Consistent Accepts given p=0.13</h3>'))
ax.axvline(22,linewidth=4, color='red')
plt.show()
def posterior_mean_var(k, alpha):
"""Compute the mean and variance of the Dirichlet posterior."""
alpha_0 = alpha.sum()
n = k.sum()
m = (k + alpha)
m /= m.sum()
v = (alpha+k)*(alpha_0 - alpha + n + k)/((alpha_0+n)**2*(alpha_0+n+1))
return m, v
k = np.asarray([22, 43, 101])
alpha = np.ones((3,))
m, v = posterior_mean_var(k, alpha)
outcome = ['consistent accept', 'inconsistent decision', 'consistent reject']
for i in range(3):
display(HTML("<h4>Probability of " + outcome[i] +' ' + str(m[i]) + "+/-" + str(2*np.sqrt(v[i])) + "</h4>"))
def sample_precisions(k, alpha, num_samps):
"""Helper function to sample from the posterior distibution of accept,
reject and inconsistent probabilities and compute other statistics of interest
from the samples."""
k = np.random.dirichlet(k+alpha, size=num_samps)
# Factors of 2 appear because inconsistent decisions
# are being accounted for across both committees.
ap = 2*k[:, 0]/(2*k[:, 0]+k[:, 1])
rp = 2*k[:, 2]/(k[:, 1]+2*k[:, 2])
aa = k[:, 0]/(k[:, 0]+k[:, 2])
return ap, rp, aa
ap, rp, aa = sample_precisions(k, alpha, 10000)
print ap.mean(), '+/-', 2*np.sqrt(ap.var())
print rp.mean(), '+/-', 2*np.sqrt(rp.var())
print aa.mean(), '+/-', 2*np.sqrt(aa.var())
0.508753122542 +/- 0.128980361541 0.822081388624 +/- 0.0531283853908 0.184137068656 +/- 0.0694158213505
fig, ax = plt.subplots(1, 3, figsize=(15, 5))
_ = ax[0].hist(ap, 20)
_ = ax[0].set_title('Accept Precision')
ax[0].axvline(0.25, linewidth=4)
_ = ax[1].hist(rp, 20)
_ = ax[1].set_title('Reject Precision')
ax[1].axvline(0.75, linewidth=4)
_ = ax[2].hist(aa, 20)
_ = ax[2].set_title('Agreed Accept Rate')
ax[2].axvline(0.10, linewidth=4)
<matplotlib.lines.Line2D at 0x10dd28910>