import

CAM;Class Activation Mapping

2주차인가.. 개,고양이 구분하는 수업!

import torch 
from fastai.vision.all import *

data

path=untar_data(URLs.PETS)/'images'

path

Path('/home/csy/.fastai/data/oxford-iiit-pet/images')

files=get_image_files(path)

def label_func(f):
    if f[0].isupper():
        return 'cat' 
    else: 
        return 'dog'

dls=ImageDataLoaders.from_name_func(path,files,label_func,item_tfms=Resize(512))

learn

lrnr=cnn_learner(dls,resnet34,metrics=error_rate) # 34-> 작을수록 간단한 모형
lrnr.fine_tune(1)

모형뜯어보기

- 샘플로 하나의 관측치를 만든다.

get_image_files(path)[0]

Path('/home/csy/.fastai/data/oxford-iiit-pet/images/Bombay_13.jpg')

img = PILImage.create(get_image_files(path)[0])
img # 객체화 후 img에 저장

x, = first(dls.test_dl([img])) # tensor화 시키기, MNIST랑 본질은 비슷!

- 전체네트워크를 1,2로 나눈다.

lrnr.model[0]

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (5): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): BasicBlock(
      (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (6): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (3): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (4): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (5): BasicBlock(
      (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (7): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): BasicBlock(
      (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
)

net1=lrnr.model[0]
net2=lrnr.model[1]  # 0은 유지하고 1만 바꿀 거

- net2를 수정한다.

net1(x).shape

torch.Size([1, 512, 16, 16])

net2 # out_features=2인 것을 보니 soft max사용한 것 같음

Sequential(
  (0): AdaptiveConcatPool2d(
    (ap): AdaptiveAvgPool2d(output_size=1)
    (mp): AdaptiveMaxPool2d(output_size=1)
  )
  (1): Flatten(full=False)
  (2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): Dropout(p=0.25, inplace=False)
  (4): Linear(in_features=1024, out_features=512, bias=False)
  (5): ReLU(inplace=True)
  (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (7): Dropout(p=0.5, inplace=False)
  (8): Linear(in_features=512, out_features=2, bias=False)
)

net2 = torch.nn.Sequential(
    torch.nn.AdaptiveAvgPool2d(output_size=1),  # 512의 output을 1로
    torch.nn.Flatten(),
    torch.nn.Linear(512,out_features=2,bias=False)) # output 을 2로

- net1, net2를 묶어서 새로운 네트워크를 만들고 다시 학습

net=torch.nn.Sequential(net1,net2)

lrnr2=Learner(dls,net,metrics=accuracy)

lrnr2.loss_func, lrnr.loss_func

(FlattenedLoss of CrossEntropyLoss(), FlattenedLoss of CrossEntropyLoss())

- loss function은 기본으로 CrossEntropyLoss로 지정되어 있다.

lrnr2.fine_tune(5)

- net1의 parameter는 바뀌지 않고 새로운 net2만 parameter가 바뀜

- 시각화

net2

Sequential(
  (0): AdaptiveAvgPool2d(output_size=1)
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=512, out_features=2, bias=False)
)

net2의 0(maxpooling)과 2(Linear)를 볼 거임

net1(x).shape, net2[2].weight.shape

(torch.Size([1, 512, 16, 16]), torch.Size([2, 512]))

- - net1에서 512$\times$16$\times$16 놓고 net에서 2$\times$512 놓고 곱해서 2$\times$16$\times$16의 차원을 가진 tensor를 만들 거

camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())

camimg.shape

torch.Size([2, 16, 16])

원래는 [1,7,7] 이었는데.. $\to$ 그래서 (7,7)를 평균내서 양인지 음인지 판단했고, 음이면 고양이 양수이면 강아지 와 같은 식으로 예측했음 (반대도가능)
지금은 내가 데이터를 만들지 않았기 때문에 1을 고양이로 했는지 0을 강아지로 했는지 모르겠음
첫번째 차원이 왜 2인지도 클리어하지 않음 (마지막 활성화함수가 sigmoid가 아니고 softmax이기 때문이라는 것은 알고 있으나 명확하게 모르겠음)

--

소프트맥스 vs 시그모이드

- 시그모이드

y의 형태: 고양이=0, 개=1
마지막 활성화함수: $u \to \frac{e^u}{1+e^u}$ 이때 $u$는 시그모이드층의 인풋 (=마지막 리니어층의 아웃풋)
$u$의 값이 클수록 dog

- 소프트맥스

$y$의 형태: 고양이=[1,0], 개=[0,1]
마지막 활성화함수: $(u_1,u_2) \to \big(\frac{e^{u_1}}{e^{u_1}+e^{u_2}},\frac{e^{u_2}}{e^{u_1}+e^{u_2}} \big)$, 이때 $(u_1,u_2)$는 소프트맥스의 인풋 (=마지막 리니어층의 아웃풋)
$u_1$의 값이 클수록 cat, $u_2$의 값이 클수록 dog

- 참고로 $\big(\frac{e^{u_1}}{e^{u_1}+e^{u_2}},\frac{e^{u_2}}{e^{u_1}+e^{u_2}} \big)$에서 분자분모에 각각 $e^{-u_1}$을 곱하면

$$\big(\frac{1}{1+e^{u_2-u_1}},\frac{e^{u_2-u_1}}{1+e^{u_2-u_1}} \big)$$

그리고 $u_2-u_1=u$라고 생각하면

$$\big(\frac{1}{1+e^{u}},\frac{e^{u}}{1+e^{u}} \big)$$

이므로, 강아지라고 생각할 확률은 $\frac{e^u}{1+e^u}$, 고양이라고 생각할 확률은 $1-\frac{e^u}{1+e^u}$이 되므로 시그모이드와 같아진다.

- 결국 이 경우 (2개의 클래스를 가지는 경우)는 똑같은 모형을 이득도 없이 파라메터만 더 써서 표현한 꼴임

- 따라서 엄밀하게 따지면 이것은 파라메터의 낭비이다. 마치

$$y_i = \alpha_0 +\beta_0 +(\alpha_1+\beta_1)x_i+\epsilon_i$$

와 비슷함

-비효율적..

- 아래의 사례역시 유사하다.

사례1: Ber(p) 대신 Ber(p,q)로 쓰는 꼴, (단 $p+q=1$)
사례2: Bin(n,p) 대신 Bin(n, (p,q))로 쓰는 꼴, (단 $p+q=1$)

- 하지만 위와 같은 표현식은 다차원으로 확장이 용이할 경우가 많다.

- 그리고 사실 파라메터를 몇개 더 써도 큰 문제는 아님

- 전역최소해를 찾지 못할거라는 주장도 있지만 꼭 전역최소해를 찾야아하는 것도 아니다.

- 결론

소프트맥스는 시그모이드의 확장이다.
클래스의 수가 2개일 경우에는 (Sigmoid, BCEloss) 조합을 사용해야 하고 클래스의 수가 2개보다 클 경우에는 (Softmax, CrossEntropyLoss) 를 사용해야 한다.

그런데 사실 클래스의 수가 2개일 경우일때 (Softmax, CrossEntropyLoss)를 사용해도 그렇게 큰일나는것은 아니다. (흑백이미지를 칼라잉크로 출력하는 느낌) _ 비효율적인 것일 뿐
오히려 resnet 같이 최적화된 모형을 뜯어 고치면서 성능 저하시키는 것이 더 안좋을 수 있다. _ 효율적으로 보이는 모형보다 최대한 일반적인 모형에 넣는 것이 낫다

--

- 다시 돌아오자. camimg를 이미지를 AP layer에 통과시키자.

torch.nn.AdaptiveAvgPool2d(output_size=1)(camimg)

TensorImage([[[ 4.5492]],

        [[-3.8361]]], device='cuda:0', grad_fn=<AliasBackward0>)

- $y\approx[0,1]$ 임은 알겠는데 이것이 개인지 고양이인지는 모르겠음.

- dls에 코딩된 라벨을 확인

dls.vocab

['cat', 'dog']

뒷쪽값이 클수록 강아지이다.

- 강아지라고 판단한 근거를 시각화하자.

2번쩨 이미지의 평균이 어떻게 나왔는지 시각화하기 위해 caming[1]

camimg[1].to("cpu").detach()
camimg[1].to("cpu").data()
#같음

plt.imshow(camimg[1].to("cpu").detach(),extent=(0,223,223,0),interpolation='bilinear',cmap='magma')

<matplotlib.image.AxesImage at 0x7f702c215af0>

- 학습에 사용된 그림

dls.train.decode((x,))[0].squeeze().show()

<AxesSubplot:>

dls.train.decade((x,))[0].shpae

첫 번째 observation
채널이 세 개,
size 224,224

- plot

fig, (ax1,ax2) = plt.subplots(1,2) 
# 
dls.train.decode((x,))[0].squeeze().show(ax=ax1)
ax1.imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
#
dls.train.decode((x,))[0].squeeze().show(ax=ax2)
ax2.imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')

<matplotlib.image.AxesImage at 0x7f70903df3a0>

magma: 검-보-빨-노 순으로 값이 크다.

search cma magma

- 오른쪽 그림에서 노란색으로 표현된 부분이 개라고 생각한 근거임

고양이가 아니라고 생각한 근거: 왼쪽 그림의 보라색
강아지라고 생각한 근거: 오른쪽 그림의 노란색

- (고양이,강아지)라고 생각한 확률

a=net(x).tolist()[0][0]
b=net(x).tolist()[0][1]
np.exp(a)/(np.exp(a)+np.exp(b)), np.exp(b)/(np.exp(a)+np.exp(b))

(0.999771931202788, 0.0002280687972120249)

예제

x, = first(dls.test_dl([PILImage.create('2022-01-13-cat.jpg')])) # 파일을 tensor화하고

a,b = net(x).tolist()[0]
catprob, dogprob = np.exp(a)/ (np.exp(a)+np.exp(b)) ,  np.exp(b)/ (np.exp(a)+np.exp(b))

camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())
fig, (ax1,ax2) = plt.subplots(1,2)
#
dls.train.decode((x,))[0].squeeze().show(ax=ax1)
ax1.imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
ax1.set_title("cat(%s)" % catprob.round(5))
#
dls.train.decode((x,))[0].squeeze().show(ax=ax2)
ax2.imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
ax2.set_title("dog(%s)" % dogprob.round(5))

Text(0.5, 1.0, 'dog(0.0)')

CAM 결과 확인

fig, ax = plt.subplots(5,5) 
k=0 
for i in range(5):
    for j in range(5): 
        x, = first(dls.test_dl([PILImage.create(get_image_files(path)[k])]))
        camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())
        a,b = net(x).tolist()[0]
        catprob, dogprob = np.exp(a)/ (np.exp(a)+np.exp(b)) ,  np.exp(b)/ (np.exp(a)+np.exp(b)) 
        if catprob>dogprob: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("cat(%s)" % catprob.round(5))
        else: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("dog(%s)" % dogprob.round(5))
        k=k+1 
fig.set_figwidth(16)            
fig.set_figheight(16)
fig.tight_layout()

fig, ax = plt.subplots(5,5) 
k=25 
for i in range(5):
    for j in range(5): 
        x, = first(dls.test_dl([PILImage.create(get_image_files(path)[k])]))
        camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())
        a,b = net(x).tolist()[0]
        catprob, dogprob = np.exp(a)/ (np.exp(a)+np.exp(b)) ,  np.exp(b)/ (np.exp(a)+np.exp(b)) 
        if catprob>dogprob: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("cat(%s)" % catprob.round(5))
        else: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("dog(%s)" % dogprob.round(5))
        k=k+1 
fig.set_figwidth(16)            
fig.set_figheight(16)
fig.tight_layout()

fig, ax = plt.subplots(5,5) 
k=50 
for i in range(5):
    for j in range(5): 
        x, = first(dls.test_dl([PILImage.create(get_image_files(path)[k])]))
        camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())
        a,b = net(x).tolist()[0]
        catprob, dogprob = np.exp(a)/ (np.exp(a)+np.exp(b)) ,  np.exp(b)/ (np.exp(a)+np.exp(b)) 
        if catprob>dogprob: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("cat(%s)" % catprob.round(5))
        else: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("dog(%s)" % dogprob.round(5))
        k=k+1 
fig.set_figwidth(16)            
fig.set_figheight(16)
fig.tight_layout()

fig, ax = plt.subplots(5,5) 
k=75
for i in range(5):
    for j in range(5): 
        x, = first(dls.test_dl([PILImage.create(get_image_files(path)[k])]))
        camimg = torch.einsum('ij,jkl -> ikl', net2[2].weight, net1(x).squeeze())
        a,b = net(x).tolist()[0]
        catprob, dogprob = np.exp(a)/ (np.exp(a)+np.exp(b)) ,  np.exp(b)/ (np.exp(a)+np.exp(b)) 
        if catprob>dogprob: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[0].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("cat(%s)" % catprob.round(5))
        else: 
            dls.train.decode((x,))[0].squeeze().show(ax=ax[i][j])
            ax[i][j].imshow(camimg[1].to("cpu").detach(),alpha=0.5,extent=(0,511,511,0),interpolation='bilinear',cmap='magma')
            ax[i][j].set_title("dog(%s)" % dogprob.round(5))
        k=k+1 
fig.set_figwidth(16)            
fig.set_figheight(16)
fig.tight_layout()

강아지 목줄에 highlight되는 경우도 있고
수염 있으면 고양이라 생각할 수도
epoc이 커지면 highlight가 더 세부화되는 듯

wolf; 눈밭에서 찍은 사진이 많아서 눈에 highlight되는 경우도

discusstion about CAM

- 장점: CNN 모형의 판단근거를 시각화하기에 우수한 툴이다.

- 단점: 모형을 일부수정해야 한다.

- 단점2: 최종아웃풋에서만 시각화를 할 수 있음.

grad-CAM ; 각 층마다 내가 원하는 아웃풋에 시각화 가능

복잡...
ref: https://github.com/fastai/fastbook
CNN은 4,5,6장

epoch	train_loss	valid_loss	accuracy	time
0	0.148260	0.173483	0.937754	00:41
1	0.150990	0.589861	0.799729	00:42
2	0.096191	0.079743	0.974290	00:41
3	0.049358	0.062507	0.970230	00:41
4	0.025491	0.051147	0.980379	00:41