빅데이터 분석 (5주차) 10월12일
손실함수의 비교, Adam, universal approximation theorem, MNIST with MLP
import torch
import numpy as np
import matplotlib.pyplot as plt
torch.manual_seed(1)
X=torch.linspace(-1,1,2000).reshape(2000,1)
w0=-1.0
w1=5.0
u=w0+X*w1
v=torch.exp(u)/(1+torch.exp(u))
y=torch.bernoulli(v)
plt.scatter(X,y,alpha=0.01)
plt.plot(X,v)
_w0=np.arange(-10,3,0.05) # start=-10, stop=3, step=0.05
_w1=np.arange(-1,10,0.05)
_w0.shape,_w1.shape
_w0,_w1=np.meshgrid(_w0,_w1,indexing='ij') # grid를 array로 만들기
- 격자 그리드 만드는 numpy의 meshgrid 함수
- Meshind의 indexing
- Cartesian: x=열, y=행
- Matrix: i=행, j=열
- Meshind의 sparse
- 메모리 아끼는 작업
_w0.shape,_w1.shape
_w0=_w0.reshape(-1) # x,y를 곱하기
_w1=_w1.reshape(-1)
_w0.shape,_w1.shape
def lossfn_crossenp(w0,w1):
yhat=torch.exp( w0+w1*X) / (1+torch.exp( w0+w1*X))
loss= - torch.mean (y*torch.log(yhat)+(1-y)*torch.log(1-yhat))
return loss.tolist()
def lossfn_mse(w0,w1):
yhat=torch.exp( w0+w1*X) / (1+torch.exp( w0+w1*X))
loss= torch.mean((y-yhat)**2)
return loss.tolist()
_l1=list(map(lossfn_crossenp,_w0,_w1))
_l2=list(map(lossfn_mse,_w0,_w1))
fig=plt.figure()
ax1=fig.add_subplot(1,2,1,projection='3d')
ax2=fig.add_subplot(1,2,2,projection='3d')
ax1.elev=15
ax2.elev=15
ax1.azim=75
ax2.azim=75
fig.set_figheight(15)
fig.set_figwidth(15)
ax1.scatter(_w0,_w1,_l1,s=0.01)
ax2.scatter(_w0,_w1,_l2,s=0.01)
_w0[np.argmin(_l1)],_w1[np.argmin(_l1)] # 실제 값이랑 비슷
_w0[np.argmin(_l2)],_w1[np.argmin(_l2)] # 실제 값이랑 비슷
ax1.scatter(_w0[np.argmin(_l1)],_w1[np.argmin(_l1)],np.min(_l1),s=200,marker='*')
ax2.scatter(_w0[np.argmin(_l2)],_w1[np.argmin(_l2)],np.min(_l2),s=200,marker='*')
- argmin : 최소값의 색인 위치
- argmax : 최대값의 색인 위치
fig
- crossentrop(왼)이 mse(오른)보다 최소값 찾기 쉬워보이는 모형
- convex란 아래로 볼록한 2차 곡선 모양을 뜻함
- 오른쪽꺼 단면을 자르게 되면 평면이 나오는 현상이 나올 수도 있다.(오른쪽 대각선 부분)
- logistic의 경우 mse가 아니라(local minimun을 만나지 않기 위해) Bionary Cross Entropy로 하면 convex를 만날 수 있다.
- MLE 이기 때문에 BCE를 써야 한다고 말할 수도 있다.
l1=torch.nn.Linear(in_features=1,out_features=1,bias=True)
a1=torch.nn.Sigmoid()
net=torch.nn.Sequential(l1,a1)
optimizer=torch.optim.SGD(net.parameters(),lr=0.05)
$$H(x)=sigmoid(Wx+b)=\frac{1}{1+e^{Wx+b}}=\sigma(Wx+b)$$
-
파라메터 초기값 $(w_0,w_1)=(-3,-1)$로 설정
l1.bias.data, l1.weight.data
l1.bias.data=torch.tensor([-3.0])
l1.weight.data=torch.tensor([[-1.0]])
l1.bias.data, l1.weight.data
-
BCEloss를 이용하여 학습+기록
w0_bce=[]
w1_bce=[]
loss_bce=[]
for epoc in range(1000):
## 1
yhat=net(X)
## 2
loss= - torch.mean(y*torch.log(yhat) + (1-y)*torch.log(1-yhat))
## 3
loss.backward()
## 4
optimizer.step()
net.zero_grad()
## 5
if epoc%20 == 0:
w0_bce.append(l1.bias.data.item())
w1_bce.append(l1.weight.data.item())
loss_bce.append(loss.item())
l1.bias.data,l1.weight.data
-
파라메터 초기값 $(w_0,w_1)=(-3,-1)$로 설정
l1.bias.data,l1.weight.data
l1.bias.data=torch.tensor([-3.0])
l1.weight.data=torch.tensor([[-1.0]])
l1.bias.data,l1.weight.data
-
MSEloss를 이용하여 학습+기록
w0_mse=[]
w1_mse=[]
loss_mse=[]
for epoc in range(1000):
## 1
yhat=net(X)
## 2
loss= torch.mean((y-yhat)**2)
## 3
loss.backward()
## 4
optimizer.step()
net.zero_grad()
## 5
if epoc%20 == 0:
w0_mse.append(l1.bias.data.item())
w1_mse.append(l1.weight.data.item())
loss_mse.append(loss.item())
l1.bias.data,l1.weight.data
-
plot
from matplotlib import animation
plt.rcParams["animation.html"] = "jshtml"
fig = plt.figure()
ax1= fig.add_subplot(2,2,1,projection='3d')
ax2= fig.add_subplot(2,2,2,projection='3d')
ax3= fig.add_subplot(2,2,3)
ax4= fig.add_subplot(2,2,4)
ax1.elev = 15
ax2.elev = 15
ax1.azim = 75
ax2.azim = 75
fig.set_figheight(15)
fig.set_figwidth(15)
### init plot
ax1.scatter(_w0,_w1,_l1,s=0.05)
ax2.scatter(_w0,_w1,_l2,s=0.05)
ax1.scatter(-3,-1,lossfn_crossenp(-3,-1),color='gray') ## bceloss(binary cross entropy loss)
ax1.scatter(-1,5.1,lossfn_crossenp(-1,5.1),s=200,color='red',marker='*') ## bceloss
ax2.scatter(-3,-1,lossfn_mse(-3,-1),color='gray') ## mseloss
ax2.scatter(-1,5.1,lossfn_mse(-1,5.1),s=200,color='red',marker='*') ## mseloss
ax3.scatter(X,y,alpha=0.01)
ax3.plot(X,v,'--')
line3, = ax3.plot(X,1/(1+torch.exp(-w0_bce[0]-w1_bce[0]*X)),'--')
ax4.scatter(X,y,alpha=0.01)
ax4.plot(X,v,'--')
line4, = ax4.plot(X,1/(1+torch.exp(-w0_mse[0]-w1_mse[0]*X)),'--')
### animation
def animate(i):
ax1.scatter(w0_bce[i],w1_bce[i],lossfn_crossenp(w0_bce[i],w1_bce[i]),color='gray')
ax2.scatter(w0_mse[i],w1_mse[i],lossfn_mse(w0_mse[i],w1_mse[i]),color='gray')
line3.set_ydata(1/(1+torch.exp(-w0_bce[i]-w1_bce[i]*X)))
line4.set_ydata(1/(1+torch.exp(-w0_mse[i]-w1_mse[i]*X)))
return line3,line4
ani = animation.FuncAnimation(fig, animate, frames=50)
plt.close()
ani
-
파라메터 초기값 $(w_0,w_1)=(-10,-1)$로 설정
l1.bias.data, l1.weight.data
l1.bias.data=torch.tensor([-10.0])
l1.weight.data=torch.tensor([[-1.0]])
l1.bias.data, l1.weight.data
-
BCEloss를 이용하여 학습+기록
w0_bce=[]
w1_bce=[]
loss_bce=[]
for epoc in range(1000):
## 1
yhat=net(X)
## 2
loss= - torch.mean(y*torch.log(yhat) + (1-y)*torch.log(1-yhat))
## 3
loss.backward()
## 4
optimizer.step()
net.zero_grad()
## 5
if epoc%20 == 0:
w0_bce.append(l1.bias.data.item())
w1_bce.append(l1.weight.data.item())
loss_bce.append(loss.item())
l1.bias.data, l1.weight.data
-
파라메터 초기값 $(w_0,w_1)=(-10,-1)$로 설정
l1.bias.data, l1.weight.data
l1.bias.data=torch.tensor([-10.0])
l1.weight.data=torch.tensor([[-1.0]])
l1.bias.data, l1.weight.data
-
MSEloss를 이용하여 학습+기록
w0_mse=[]
w1_mse=[]
loss_mse=[]
for epoc in range(1000):
## 1
yhat=net(X)
## 2
loss= torch.mean((y-yhat)**2)
## 3
loss.backward()
## 4
optimizer.step()
net.zero_grad()
## 5
if epoc%20 == 0:
w0_mse.append(l1.bias.data.item())
w1_mse.append(l1.weight.data.item())
loss_mse.append(loss.item())
l1.bias.data, l1.weight.data
-
plot
fig = plt.figure()
ax1= fig.add_subplot(2,2,1,projection='3d')
ax2= fig.add_subplot(2,2,2,projection='3d')
ax3= fig.add_subplot(2,2,3)
ax4= fig.add_subplot(2,2,4)
ax1.elev = 15
ax2.elev = 15
ax1.azim = 75
ax2.azim = 75
fig.set_figheight(15)
fig.set_figwidth(15)
### init plot
ax1.scatter(_w0,_w1,_l1,s=0.05)
ax2.scatter(_w0,_w1,_l2,s=0.05)
ax1.scatter(-10,-1,lossfn_crossenp(-10,-1),color='gray') ## bceloss
ax1.scatter(-1,5.1,lossfn_crossenp(-1,5.1),s=200,color='red',marker='*') ## bceloss
ax2.scatter(-10,-1,lossfn_mse(-10,-1),color='gray') ## mseloss
ax2.scatter(-1,5.1,lossfn_mse(-1,5.1),s=200,color='red',marker='*') ## mseloss
ax3.scatter(X,y,alpha=0.01)
ax3.plot(X,v,'--')
line3, = ax3.plot(X,1/(1+torch.exp(-w0_bce[0]-w1_bce[0]*X)),'--')
ax4.scatter(X,y,alpha=0.01)
ax4.plot(X,v,'--')
line4, = ax4.plot(X,1/(1+torch.exp(-w0_mse[0]-w1_mse[0]*X)),'--')
### animation
def animate(i):
ax1.scatter(w0_bce[i],w1_bce[i],lossfn_crossenp(w0_bce[i],w1_bce[i]),color='gray')
ax2.scatter(w0_mse[i],w1_mse[i],lossfn_mse(w0_mse[i],w1_mse[i]),color='gray')
line3.set_ydata(1/(1+torch.exp(-w0_bce[i]-w1_bce[i]*X)))
line4.set_ydata(1/(1+torch.exp(-w0_mse[i]-w1_mse[i]*X)))
return line3,line4
ani = animation.FuncAnimation(fig, animate, frames=50)
plt.close()
ani