Skip to content

add support for exporting model to word2vec format #75

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Makefile~
*.npy
*.bz2
*#*
.idea
13 changes: 13 additions & 0 deletions examples/export_word2vec_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from glove import Glove
import argparse

# Convert binary model to standardized .vec format for compatibility
# Example command: python export_word2vec_format.py -i model.model -o model.vec
if __name__ == '__main__':
# Set up command line parameters.
parser = argparse.ArgumentParser(description='Export model to word2vec format')
parser.add_argument("-i", "--input", type=str, default=None, help="input model")
parser.add_argument("-o", "--output", type=str, default=None, help="output model")
args = parser.parse_args()
glove = Glove.load(args.input)
glove.save_word2vec_format(args.output)
20 changes: 20 additions & 0 deletions glove/glove.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,26 @@ def save(self, filename):
savefile,
protocol=pickle.HIGHEST_PROTOCOL)

def save_word2vec_format(self, filename):
"""
Serialize model to filename in word2vec .vec format.
"""
with open(filename, 'w') as savefile:
(rows, cols) = self.word_vectors.shape
savefile.write(str(rows) + " " + str(cols) + "\n")
if hasattr(self.dictionary, 'iteritems'):
# Python 2 compat
items_iterator = self.dictionary.iteritems()
else:
items_iterator = self.dictionary.items()

for word, idx in items_iterator:
vector = self.word_vectors[idx]
vector_string = ""
for val_i in vector:
vector_string += " " + str(val_i)
savefile.write((word + vector_string + "\n"))

@classmethod
def load(cls, filename):
"""
Expand Down