吴恩达deepLearning.ai

img

能解答一下,图片中圈出来的式子,是怎么推出来的吗?主要是不明白为什么要 < *g[1]'(z[1]) >

链式法则
dz1=(dz2/da1)*(da1/dz1) 右边就是你说的g[1]'(z[1]) ,当然不能丢

谢谢,您记得笔记很详细,不知道下面这个问题您能否帮我看看?
我的计算是W1(4,2)b1(4,1)
输入的X(2,3),Y(1,3)
这个Z1是可以进行广播的,但不知道为什么好像b1变成了(4,4)

img


.

import numpy as np
from testCases import *
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import sklearn.linear_model
from planar_utils import plot_decision_boundary, sigmoid , load_planar_dataset, load_extra_datasets

np.random.seed(1)
def layer_sizes(X,Y):
    n_x = X.shape[0]
    n_h = 4
    n_y = Y.shape[0]
    
    return (n_x,n_h,n_y)
def initialize_parameters(n_x,n_h,n_y):
    
    np.random.seed(2)
    
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros(shape = (n_h,1))
    W2 = np.random.randn(n_y,n_h)*0.01
    b2 = np.zeros(shape = (n_y,1))
    
    #使用断言确保数据是正确的
    assert(W1.shape == (n_h,n_x))
    assert(b1.shape == (n_h, 1))
    assert(W2.shape == (n_y,n_h))
    assert(b2.shape == (n_y, 1))
    
    #创建字典储存
    
    parameters = {
        "W1":W1,
        "b1":b1,
        "W2":W2,
        "b2":b2
    }
    
    return parameters
def forward_propagation(X, parameters):
    
    #提取数据
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    
    #开始计算(根据你自己的设计就行)
    
    Z1 = np.dot(W1,X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) +b2
    A2 = sigmoid(Z2)
    
    #确保正确
    assert(A2.shape == (1,X.shape[1]))
    
    cache = {
        "Z1":Z1,
        "A1":A1,
        "Z2":Z2,
        "A2":A2
    }
    
    return (A2, cache)
def compute_cost(A2,Y,parameters):
        
    m = Y.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
        
        #计算成本
        
    logprobs =np.multiply(np.log(A2),Y) + np.multiply((1-Y),np.log(1-A2))
        
    cost = -np.sum(logprobs) / m
    cost = float(np.squeeze(cost))
        
    assert(isinstance(cost,float))
        
    return cost
def backward_propagation(parameters,cache,X,Y):
    
    m = X.shape[1]
    
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    
    A1 = cache["A1"]
    A2 = cache["A2"]
    
    dZ2 = A2 - Y
    dW2 = (1/m) * np.dot(dZ2,A1.T)
    db2 = (1/m) * np.sum(dZ2 , axis = 1 ,keepdims = True )
    dZ1 = np.multiply(np.dot(W2.T,dZ2),1 - np.power(A1,2))
    dW1 = (1/m) * np.dot(dZ1,X.T)
    db1 = (1/m) * np.sum(dZ1,axis = 1)
    
    grads = {
        "dW1":dW1,
        "db1":db1,
        "dW2":dW2,
        "db2":db2
    }
    
    return grads
def update_parameters(parameters,grads,learning_rate = 1.2):
    
    #参数导入
    W1,W2 = parameters["W1"],parameters["W2"]
    b1,b2 = parameters["b1"],parameters["b2"]
    
    dW1,dW2 = grads["dW1"],grads["dW2"]
    db1,db2 = grads["db1"],grads["db2"]
    
    #更新参数
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {
        "W1":W1,
        "b1":b1,
        "W2":W2,
        "b2":b2
    }
    return parameters
def nn_model(X,Y,n_h,num_iterations,print_cost = False):
    
    np.random.seed(3)
    
    n_x = layer_sizes(X,Y)[0]
    n_y = layer_sizes(X,Y)[2]
    
    parameters = initialize_parameters(n_x,n_h,n_y)
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    for i in range(num_iterations):
        A2, cache = forward_propagation(X,parameters)
        cost = compute_cost(A2,Y,parameters)
        grads = backward_propagation(parameters,cache,X,Y)
        parameters = update_parameters(parameters,grads,learning_rate = 0.5)
        
        if print_cost:
            if i%100 == 0:
                print("第",i,"次循环,成本为:" + str(cost))
    return parameters

X_assess, Y_assess = nn_model_test_case()
parameters = nn_model(X_assess, Y_assess, 4, num_iterations=10000, print_cost=False)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))