, , . scikit, , ( ).
import pandas as pd
from io import StringIO
s1 = StringIO(u'''Lon2,Lat2,Type,Data-1,Data-2
11,11,A,Eggs,Bacon
51,51,A,Nuts,Bread
61,61,A,Beef,Lamb
21,21,B,Chips,Chicken
31,31,B,Sauce,Pasta
71,71,B,Rice,Oats
81,81,B,Beans,Peas''')
df2 = pd.read_csv(s1)
from sklearn.neighbors import NearestNeighbors
import numpy as np
dfNN = pd.DataFrame()
idx = 0
for i in pd.unique(df2.Type):
dftype = df2[df2['Type'] == i].reindex()
X = dftype[['Lon2','Lat2']].values
nbrs = NearestNeighbors(n_neighbors=2, algorithm='kd_tree').fit(X)
distances, indices = nbrs.kneighbors(X)
for j in range(len(indices)):
dfNN = dfNN.append(dftype.iloc[[indices[j][0]]])
dfNN.loc[idx, 'Data-1b'] = dftype.iloc[[indices[j][1]]]['Data-1'].values[0]
dfNN.loc[idx, 'Data-2b'] = dftype.iloc[[indices[j][1]]]['Data-2'].values[0]
dfNN.loc[idx, 'Distance'] = distances[j][1]
idx += 1
dfNN = dfNN[['Lat2', 'Lon2', 'Type', 'Data-1', 'Data-2','Data-1b','Data-2b','Distance']]