EDIT:
I realized that I put this at the top, because it is larger - it turns out that a slight tuning of the OP code gives a rather large performance bump.
def list2dict(list_):
return_dict = {}
for idx, word in enumerate(list_):
if word in return_dict:
raise ValueError("duplicate string found in list: %s" % (word))
return_dict[word] = idx
return return_dict
def list2dictNEW(list_):
return_dict = {}
for idx, word in enumerate(list_):
return_dict[word] = idx
if len(return_dict) == len(list_): return return_dict
else: raise ValueError("There were duplicates in list {}".format(list_))
DEMO:
>>> timeit(lambda: list2dictNEW(TEST))
1.9117132451798682
>>> timeit(lambda: list2dict(TEST)):
2.2543816669587216
There are no obvious answers, but you can try something like:
def list2dict(list_):
return_dict = dict()
for idx,word in enumerate(list_):
return_dict.setdefault(word,idx)
return return_dict
list.index, , , , , . ( timeit.timeit)
def list2dict(list_):
set_ = set(list_)
return {word:list_.index(word) for word in set_}
. :
TEST = ['a','b','c','d','e','f','g','h','i','j']
def list2dictA(list_):
set_ = set(list_)
return {word:list_.index(word) for word in set_}
def list2dictB(list_):
return_dict = dict()
for idx,word in enumerate(list_):
return_dict.setdefault(word,idx)
return return_dict
def list2dictC(list_):
return_dict = {word:idx for idx,word in enumerate(list_)}
if len(return_dict) == len(list_):
return return_dict
else:
raise ValueError("Duplicate string found in list")
def list2dictD(list_):
return_dict = {}
for idx, word in enumerate(list_):
if word in return_dict:
raise ValueError("duplicate string found in list: %s" % (word))
return_dict[word] = idx
return return_dict
>>> timeit(lambda: list2dictA(TEST))
5.336584700190931
>>> timeit(lambda: list2dictB(TEST))
2.7587691306531
>>> timeit(lambda: list2dictC(TEST))
2.1609074989233292
>>> timeit(lambda: list2dictD(TEST))
2.2543816669587216