gensim , , limit load_word2vec_format(). (, GoogleNews , N N-, . limit=500000, 500 000 - - 5/6- / .)
. -, IO .
, , .
, word2vec.c-origin, , gensim native save(). , ( GoogleNews ), . , gensim native [load(filename, mmap='r')][1].
- , , , , , . !
, , most_similar(), , . , ( , ), ( , N- ). , , - IO.
, , , - . - ( -). , , .
word2vec.c load_word2vec_format(). model.init_sims(replace=True), , ( ).
: model.save('GoogleNews-vectors-gensim-normed.bin``. ( , , , .)
Python, , . , , ( ), , . , KeyedVectors , . ( , .)
:
from gensim.models import KeyedVectors
from threading import Semaphore
model = KeyedVectors.load('GoogleNews-vectors-gensim-normed.bin', mmap='r')
model.syn0norm = model.syn0
model.most_similar('stuff')
Semaphore(0).acquire()
, , / -. , . , / , . .
, - :
model = KeyedVectors.load('GoogleNews-vectors-gensim-normed.bin', mmap='r')
model.syn0norm = model.syn0 # prevent recalc of normed vectors
. ( , , X , , X , .).
, web-reqeust load() , . , , - /, - .
, , . , , , , . ( : .)
, , / "" , -, , .