一个月以来,我刚刚进入机器学习,特别是深度学习,并努力工作。在学习了所有的数学概念之后,我决定自己在 python 中用一个神经网络来做,它工作正常。(超精度)
我现在决定用 2 个神经元、1 个输出神经元和 2 个输入的一个隐藏层来做,但这不起作用......确实成本没有降低,准确度也没有增加。但程序有效(输出如下)
import numpy as np
import matplotlib.pyplot as plt
def init_variables():
"""
Init model variables (weights, biais)
"""
weights_11 = np.random.normal(size=2)
weights_12 = np.random.normal(size=2)
weight_ouput = np.random.normal(size=2)
bias_11 = 0
bias_12 = 0
bias_output = 0
return weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output
def get_dataset():
"""
Method used to generate the dataset
"""
#Number of rows per class
row_per_class = 100
#generate rows
sick_people = (np.random.randn(row_per_class,2)) + np.array([-2,-2])
sick_people2 = (np.random.randn(row_per_class,2)) + np.array([2,2])
healthy_people = (np.random.randn(row_per_class,2)) + np.array([-2,2])
healthy_people2 = (np.random.randn(row_per_class,2)) + np.array([2,-2])
features = np.vstack([sick_people,sick_people2, healthy_people, healthy_people2])
targets = np.concatenate((np.zeros(row_per_class*2), np.zeros(row_per_class*2)+1))
#plt.scatter(features[:,0], features[:,1], c=targets, cmap = plt.cm.Spectral)
#plt.show()
return features, targets
def pre_activation(features, weights, bias):
"""
compute pre activation of the neural
"""
return np.dot(features, weights) + bias
def activation(z):
"""
compute the activation (sigmoide)
"""
return 1 / ( 1 + np.exp(-z) )
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return activation(z) * (1 - activation(z))
def cost(predictions, targets):
"""
make the difference between predictions and results
"""
return np.mean((predictions - targets)**2)
def predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12):
"""
This function is not generic at all and aims to understand how is made the input for the next ouput neural
"""
predictions_11 = activation(pre_activation(features, weights_11, bias_11))
predictions_12 = activation(pre_activation(features, weights_12, bias_12))
layer1_result = np.stack((predictions_11, predictions_12), axis=-1)
return layer1_result
def predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
"""
Determine the prediction of the output
"""
layer1_result = predict_hidden_layer(features, weights_11, weights_12, bias_11, bias_12)
output_result = activation(pre_activation(layer1_result, weight_ouput, bias_output))
return layer1_result, output_result
def train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output):
"""
function of training multiple neural (ajust weights and bias in function of features and targets)
This function is not generic or optimized and aims to understand better how it works
"""
epochs = 100
learning_rate = 0.1
#display Accuracy before the training
layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
predictions = np.around(prediction)
print ("Accuracy", np.mean(predictions == targets))
for epoch in range(epochs):
layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
if epoch % 10 == 0:
layer1, predictions = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
print (cost(predictions, targets))
"""
There are a lot of things to do here !
to do the back propagation, we will first train the ouput neural
"""
#Init gradient
weights_gradient_output = np.zeros(weight_ouput.shape)
bias_gradient_output = 0
#Go throught each row
for neural_input, target, prediction in zip(layer1, targets, predictions):
#compute pre activation
z = pre_activation(neural_input, weight_ouput, bias_output)
#Update the gradient
weights_gradient_output += (prediction - target)* derivative_activation(prediction) * neural_input
bias_gradient_output += (prediction - target)* derivative_activation(prediction)
"""
Now we are going to train hiddens layer of neurals
"""
weights_gradient_11 = np.zeros(weights_11.shape)
bias_gradient_11 = 0
weights_gradient_12 = np.zeros(weights_12.shape)
bias_gradient_12 = 0
#Go throught each row
for neural_output, feature, target, prediction in zip(layer1, features, targets, predictions):
#compute pre activation
z = pre_activation(neural_input, weights_11, bias_11)
#Update the gradient
weights_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0] * derivative_activation(neural_output[0]) * feature
bias_gradient_11 += (prediction - target)* derivative_activation(prediction) * weight_ouput[0] * derivative_activation(neural_output[0])
#print (weights_gradient_11)
#Update the gradient
weights_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1] * derivative_activation(neural_output[1]) * feature
bias_gradient_12 += (prediction - target)* derivative_activation(prediction) * weight_ouput[1] * derivative_activation(neural_output[1])
#Update the weights and bias
weight_ouput = weight_ouput - (learning_rate * weights_gradient_output)
bias_output = bias_output - (learning_rate * bias_gradient_output)
weights_11 = weights_11 - (learning_rate * weights_gradient_11)
bias_11 = bias_11 - (learning_rate * bias_gradient_11)
weights_12 = weights_12 - (learning_rate * weights_gradient_12)
bias_12 = bias_12 - (learning_rate * bias_gradient_12)
layer1, prediction = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
predictions = np.around(prediction)
print ("Accuracy", np.mean(predictions == targets))
if __name__ == '__main__':
#dataset
features, targets = get_dataset()
#variables
weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output = init_variables()
layer1_result, output_result = predict_output_neural(features, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
train_multiple_neurals(features, targets, weights_11, weights_12, weight_ouput, bias_11, bias_12, bias_output)
代码效率不高,因为我试图一步一步地理解一切,我知道问题出在隐藏层的训练中,但他们尊重我在互联网上看到的公式(神经输入*(预测 - 目标)* sigmoid'(预测)*(weightOfTheNextLayer),这就是为什么我真的不明白。
这是我的输出(准确度在开始和成本之间),准确度没有增加,成本没有减少:
Accuracy 0.6025
0.32149563353794364
0.3216454935878719
0.32177853678600526
0.32189583396850424
0.32199849304998307
0.3220876323586574
0.3221644075538757
0.32223008209366144
0.32228608192864866
0.32233396315649065
0.3223752777740352
0.32241140511378036
0.3224434401200392
0.3224721764785219
0.32249815913581226
0.32252176039218206
0.32254324818743063
0.32256283493698107
0.32258070692435065
0.3225970387325917
0.3226119980415239
0.322625745368742
0.3226384319652169
0.32265019765826863
0.3226611692835548
0.32267145957097
0.3226811659211415
0.32269036836411585
0.3226991261062232
0.32270747252405985
0.3227154094426258
0.3227229031837465
0.32272988687106613
0.3227362744197289
0.3227419889521814
0.3227470002539846
0.32275135531703975
0.3227551824643601
0.3227586613182756
0.32276197240283183
0.32276525289471264
0.32276857750543586
0.3227719648351581
0.3227753969249716
0.32277883940346674
0.3227822558361521
0.32278561551026963
0.3227888964074382
0.322792085387534
0.3227951770494241
Accuracy 0.5
如果你们能帮我解决这个问题,那就太棒了!
可能你的导数函数有一些错误。
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return activation(z) * (1 - activation(z))
假设您out_F = sigmod(in_F)
在最后一个输出层,out_F
您的prediction
和in_F
是最后一个节点的输入。
对于这个函数,正如您的函数名称所暗示的那样,可能是指对 that 进行推导in_F
。所以应该是d{out_F}/d{in_F} = out_F * (1 - out_F)
试试这个:
def derivative_activation(z):
"""
compute the derivative of the activation (derivative of sigmoide)
"""
return z * (1 - z)
本文收集自互联网,转载请注明来源。
如有侵权,请联系[email protected] 删除。
我来说两句