Train on 28624 samples
Epoch 1/10
32/28624 [..............................] - ETA: 15:20
InvalidArgumentError Traceback (most recent call last)
<ipython-input-25-4679097c6578> in <module>
----> 1 model.fit(X_train_indices, Y_train_OH, epochs = 10, batch_size = 32)**
InvalidArgumentError: indices[15,2] = -2147483648 is not in [0, 1193514)
[[node model_1/embedding_1/embedding_lookup (defined at <ipython-input-25-4679097c6578>:1) ]] [Op:__inference_distributed_function_6120]
Errors may have originated from an input operation.
Input Source operations connected to node model_1/embedding_1/embedding_lookup:
model_1/embedding_1/embedding_lookup/4992 (defined at C:\Users\shash\Anaconda3\envs\sentiment_analysis\lib\contextlib.py:81)
Function call stack:
distributed_function
系统信息
码
->嵌入层
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
vocab_len = len(word_to_index) + 1 #1193514
emb_matrix = np.zeros((vocab_len,embedding_dim))
for word, idx in word_to_index.items():
emb_matrix[idx, :] = word_to_vec_map[word]
# Definning a pre-trained Embedding layer
embedding_layer = layers.Embedding(
vocab_len,
embedding_dim,
trainable = False
)
# Build the embedding layer, it is required before setting the weights of the embedding layer.
embedding_layer.build((None,))
# Set the weights of the embedding layer to the embedding matrix. Your layer is now pretrained.
embedding_layer.set_weights([emb_matrix])
return embedding_layer
->型号
def sentiment_model(input_shape, word_to_vec_map, word_to_index):
sentence_indices =layers.Input(shape=input_shape, dtype='float32')
# Create the embedding layer pretrained with GloVe Vectors
embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index)
# Propagate sentence_indices through your embedding layer
# (See additional hints in the instructions).
embeddings = embedding_layer(sentence_indices)
x = layers.LSTM(128)(embeddings)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(2, activation="sigmoid", name="predictions")(x)
# Create Model instance which converts sentence_indices into X.
model = keras.Model(inputs=sentence_indices,outputs=predictions)
return model
def sentences_to_indices(X, word_to_index, max_len):
X_indices = np.zeros((m,max_len))
# Assign indices to words
for i,sentence in enumerate(X):
sentence_words = sentence.lower().split()
for j,word in enumerate(sentence_words):
X_indices[i, j] = word_to_index[word]
return X_indices
def get_word_embedding_dictionary():
""" creates word_to_vector, word_to_index and index_to_word dictionaries """
with open(embeding_path, 'r', encoding='utf-8') as f:
words = set()
word_to_vec_map = {}
# Extracting word and its vectors
for line in f:
line_list = line.split()
# Ignoring unresolvable words
if len(line_list)!=embedding_dim+1:
continue
curr_word = line_list[0]
words.add(curr_word)
word_to_vec_map[curr_word] = np.array(line_list[1:], dtype=np.float64)
word_to_index = {}
index_to_word = {}
for i,w in enumerate(sorted(words)):
word_to_index[w] = i
index_to_word[i] = w
return word_to_index, index_to_word, word_to_vec_map
X_train_indices = sentences_to_indices(X_train, word_to_index, max_features)
Y_train_OH = to_categorical(Y_train)
model.fit(X_train_indices, Y_train_OH, epochs = 10, batch_size = 32)
问题是当单词被其相应的索引替换时。如果在词汇/ word_to_index词典中找不到该单词,则将其存储为nan。
词汇表是词嵌入中存在的所有词(我使用过GloVe twitter嵌入)。
修改功能:
def sentences_to_indices(X, word_to_index, max_len):
X_indices = np.zeros((m,max_len))
# Assign indices to words
for i,sentence in enumerate(X):
sentence_words = sentence.lower().split()
for j,word in enumerate(sentence_words):
X_indices[i, j] = word_to_index.get(word,0) #improvement
return X_indices
不过,我不确定单词嵌入中不存在的单词是否应存储为零。
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句