单词到索引的映射
word_to_index = {word: i for i, word in enumerate(vocab)}
index_to_word = {i: word for word, i in word_to_index.items()}
保存词汇表映射
with open('word_to_index.pkl', 'wb') as f:
pickle.dump(word_to_index, f)
with open('index_to_word.pkl', 'wb') as f:
pickle.dump(index_to_word, f)
准备训练数据
seq_len = 10 序列长度
X = []
y = []
for i in range(len(words) seq_len):
seq = words[i:i + seq_len]
label = words[i + seq_len]
X.append([word_to_index[word] for word in seq])
y.append(word_to_index[label])
使用模型生成文本
def generate_text(model, seed, length=100):
generated_text = seed[:]
for _ in range(length):
x = np.array([[word_to_index[w] for w in generated_text[seq_len:]]])
pred = model.predict(x)
pred_word = index_to_word[np.argmax(pred)]
generated_text.append(pred_word)
return ' '.join(generated_text)