Python .
, score - , -
(x
). 0 1,
:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
primes = np.array([2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97])
def harmonic(divisor, size=10):
return np.random.randint(1, 10, size=size) * divisor
def prime_sample(size=10):
return np.random.choice(primes, size=size)
def noisy(x, amount=0.1):
return x + (np.random.random(size=len(x)) * 2 - 1) * amount
def prob(x, mean, sd):
return stats.norm.pdf(x, loc=mean, scale=sd)
def score(x, multiplier, offset, kmax=20):
k = np.arange(kmax)
means = (k * multiplier + offset)[:, None]
closest_multiple = (np.abs(x - means).argmin(axis=0)) * multiplier
result = np.exp(-((x - closest_multiple)**2).sum())
return result
def fit(x, multipliers, offsets, kmax=20, sd=0.2):
"sd is the standard deviation of the noise"
k = np.arange(kmax)
M, O, K = np.meshgrid(multipliers, offsets, k, indexing='ij')
means = (K * M + O)[..., None]
p = prob(x, means, sd)
L = np.log(p.sum(axis=-2)).sum(axis=-1)
i, j = np.unravel_index(L.argmax(), L.shape)
max_L = L[i, j]
multiplier = multipliers[i]
offset = offsets[j]
return dict(loglikelihood=L, max_L=max_L,
multiplier=multiplier, offset=offset,
score=score(x, multiplier, offset, kmax))
multipliers = np.linspace(3, 10, 100)
offsets = np.linspace(-1.5, 1.5, 50)
X, Y = np.meshgrid(multipliers, offsets, indexing='ij')
tests = [([12, 8, 28, 20, 32, 12, 28, 16, 4, 12], 1),
([3, 5, 7, 11, 13, 27, 54, 57], 0),
(noisy(harmonic(3, size=20)), 1),
(noisy(prime_sample()), 0)]
for x, expected in tests:
result = fit(x, multipliers, offsets, kmax=20)
Z = result['loglikelihood']
plt.contourf(X, Y, Z)
plt.xlabel('multiplier')
plt.ylabel('offset')
plt.scatter(result['multiplier'], result['offset'], s=20, c='red')
plt.title('score = {:g}, expected = {:g}'
.format(result['score'], expected))
plt.show()
x = [12, 8, 28, 20, 32, 12, 28, 16, 4, 12]:
x = [3, 5, 7, 11, 13, 27, 54, 57]:
