import

5주차에 했던 MLP와 다른 모형으로 해보는 수업, 일단 5주차 내용으로 시작

import torch 
from fastai.vision.all import *

import graphviz

def gv(s): return graphviz.Source('digraph G{ rankdir="LR"'+ s + ';}')

data

- download data

path = untar_data(URLs.MNIST_SAMPLE)

path.ls()

(#3) [Path('/home/csy/.fastai/data/mnist_sample/train'),Path('/home/csy/.fastai/data/mnist_sample/labels.csv'),Path('/home/csy/.fastai/data/mnist_sample/valid')]

- list

threes=(path/'train'/'3').ls()
sevens=(path/'train'/'7').ls()

- list $\to$ image

Image.open(threes[4])

- image $\to$ tensor

tensor(Image.open(threes[4]))

tensor([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  64, 128, 128,
         191, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0, 128, 255, 255, 255, 255,
         255, 255, 191,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0, 255, 255, 255, 191, 128, 128,
          64, 191, 255, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0, 191, 128,   0,   0,   0,   0,
           0, 128, 255, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 128, 255, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0, 191, 255,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         128, 255,  64,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  64,
         255, 191,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 128, 255,
         255, 191,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 128, 191, 255, 255,
         255, 255, 255, 191,  64,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,  64, 255, 255, 255, 191, 128,
          64, 128, 255, 255, 255, 128,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0, 128, 128,  64,   0,   0,
           0,   0,   0,  64, 191, 255, 128,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 255, 255,  64,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 255, 255,  64,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0, 255, 255,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,  64, 191, 255, 128,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,  64,   0,   0,   0,   0,   0,   0,   0,
           0,  64, 191, 255, 255, 128,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0, 255, 128,   0,   0,   0,   0,   0,   0,
         128, 255, 255, 191,  64,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,  64, 255, 255, 128, 128, 128, 191, 255,
         255, 128,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0, 128, 128, 128, 255, 255, 191, 128,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
        [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
           0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0]],
       dtype=torch.uint8)

여기에서 tensor는 파이토치가 아니라 fastai에서 구현한 함수임

- 여러개의 리스트를 모두 텐서로 바꿔보자.

seven_tensor = torch.stack([tensor(Image.open(i)) for i in sevens]).float()/255
three_tensor = torch.stack([tensor(Image.open(i)) for i in threes]).float()/255

- $X$와 $y$를 만들자.

seven_tensor.shape, three_tensor.shape

(torch.Size([6265, 28, 28]), torch.Size([6131, 28, 28]))

y=torch.tensor([0.0]*6265+ [1.0]*6131).reshape(12396,1)

X=torch.vstack([seven_tensor,three_tensor]).reshape(12396,-1)

X.shape, y.shape

(torch.Size([12396, 784]), torch.Size([12396, 1]))

기존의 MLP 모형

${\bf X} \to {\bf WX+b} \to f({\bf WX+b}) \to \dots \to {\bf y}$

${\bf X}=12396 \times 784$ matrix
${\bf y}=12396 \times 1$ (col) vector

- 교재의 모형

gv('''
splines=line
subgraph cluster_1{
    style=filled;
    color=lightgrey;
    "x1"
    "x2"
    ".."
    "x784"
    label = "Layer 0"
}
subgraph cluster_2{
    style=filled;
    color=lightgrey;
    "x1" -> "node1"
    "x2" -> "node1"
    ".." -> "node1"
    
    "x784" -> "node1"
    "x1" -> "node2"
    "x2" -> "node2"
    ".." -> "node2"
    "x784" -> "node2"
    
    "x1" -> "..."
    "x2" -> "..."
    ".." -> "..."
    "x784" -> "..."

    "x1" -> "node30"
    "x2" -> "node30"
    ".." -> "node30"
    "x784" -> "node30"


    label = "Layer 1: ReLU"
}
subgraph cluster_3{
    style=filled;
    color=lightgrey;
    "node1" -> "y"
    "node2" -> "y"
    "..." -> "y"
    "node30" -> "y"
    label = "Layer 2: Sigmoid"
}
''')

Layer 선형 $\to$ 2d convolution Maxpooling, ReLU 비선형 $\to$ Sigmoid 선형

- 왜 28$\times$28 이미지를 784개의 벡터로 만든 다음에 모형을 돌려야 하는가?

- 기존에 개발된 모형이 회귀분석 기반으로 되어있어서 결국 회귀분석 틀에 짜 맞추어서 이미지자료를 분석하는 느낌

- observation의 차원은 $784$가 아니라 $1\times (28\times 28)$이 되어야 맞다.

이는 흑백으로 했을때, 빛의 3원색 조화 개념이 들어오면 3배로 많아질 수 있다.

X.shape

torch.Size([12396, 784])

X=X.reshape(12396,1,28,28)

X.shape

torch.Size([12396, 1, 28, 28])

plt.imshow(X[776][0])

<matplotlib.image.AxesImage at 0x7f15f0adef10>

선형변환 대신에 2d convolution with windowsize=5

torch.nn.Conv2d?

Init signature:
torch.nn.Conv2d(
    in_channels: int,
    out_channels: int,
    kernel_size: Union[int, Tuple[int, int]],
    stride: Union[int, Tuple[int, int]] = 1,
    padding: Union[str, int, Tuple[int, int]] = 0,
    dilation: Union[int, Tuple[int, int]] = 1,
    groups: int = 1,
    bias: bool = True,
    padding_mode: str = 'zeros',
    device=None,
    dtype=None,
) -> None
Docstring:     
Applies a 2D convolution over an input signal composed of several input
planes.

In the simplest case, the output value of the layer with input size
:math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`
can be precisely described as:

.. math::
    \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
    \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)


where :math:`\star` is the valid 2D `cross-correlation`_ operator,
:math:`N` is a batch size, :math:`C` denotes a number of channels,
:math:`H` is a height of input planes in pixels, and :math:`W` is
width in pixels.


This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

* :attr:`stride` controls the stride for the cross-correlation, a single
  number or a tuple.

* :attr:`padding` controls the amount of padding applied to the input. It
  can be either a string {'valid', 'same'} or a tuple of ints giving the
  amount of implicit padding applied on both sides.

* :attr:`dilation` controls the spacing between the kernel points; also
  known as the à trous algorithm. It is harder to describe, but this `link`_
  has a nice visualization of what :attr:`dilation` does.

* :attr:`groups` controls the connections between inputs and outputs.
  :attr:`in_channels` and :attr:`out_channels` must both be divisible by
  :attr:`groups`. For example,

    * At groups=1, all inputs are convolved to all outputs.
    * At groups=2, the operation becomes equivalent to having two conv
      layers side by side, each seeing half the input channels
      and producing half the output channels, and both subsequently
      concatenated.
    * At groups= :attr:`in_channels`, each input channel is convolved with
      its own set of filters (of size
      :math:`\frac{\text{out\_channels}}{\text{in\_channels}}`).

The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:

    - a single ``int`` -- in which case the same value is used for the height and width dimension
    - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
      and the second `int` for the width dimension

Note:
    When `groups == in_channels` and `out_channels == K * in_channels`,
    where `K` is a positive integer, this operation is also known as a "depthwise convolution".

    In other words, for an input of size :math:`(N, C_{in}, L_{in})`,
    a depthwise convolution with a depthwise multiplier `K` can be performed with the arguments
    :math:`(C_\text{in}=C_\text{in}, C_\text{out}=C_\text{in} \times \text{K}, ..., \text{groups}=C_\text{in})`.

Note:
    In some circumstances when given tensors on a CUDA device and using CuDNN, this operator may select a nondeterministic algorithm to increase performance. If this is undesirable, you can try to make the operation deterministic (potentially at a performance cost) by setting ``torch.backends.cudnn.deterministic = True``. See :doc:`/notes/randomness` for more information.

Note:
    ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
    the input so the output has the shape as the input. However, this mode
    doesn't support any stride values other than 1.

Args:
    in_channels (int): Number of channels in the input image
    out_channels (int): Number of channels produced by the convolution
    kernel_size (int or tuple): Size of the convolving kernel
    stride (int or tuple, optional): Stride of the convolution. Default: 1
    padding (int, tuple or str, optional): Padding added to all four sides of
        the input. Default: 0
    padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
        ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
    dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
    groups (int, optional): Number of blocked connections from input
        channels to output channels. Default: 1
    bias (bool, optional): If ``True``, adds a learnable bias to the
        output. Default: ``True``


Shape:
    - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
    - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where

      .. math::
          H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
                    \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor

      .. math::
          W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
                    \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor

Attributes:
    weight (Tensor): the learnable weights of the module of shape
        :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
        :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
        The values of these weights are sampled from
        :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
        :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
    bias (Tensor):   the learnable bias of the module of shape
        (out_channels). If :attr:`bias` is ``True``,
        then the values of these weights are
        sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
        :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`

Examples:

    >>> # With square kernels and equal stride
    >>> m = nn.Conv2d(16, 33, 3, stride=2)
    >>> # non-square kernels and unequal stride and with padding
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
    >>> # non-square kernels and unequal stride and with padding and dilation
    >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
    >>> input = torch.randn(20, 16, 50, 100)
    >>> output = m(input)

.. _cross-correlation:
    https://en.wikipedia.org/wiki/Cross-correlation

.. _link:
    https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.
File:           ~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/conv.py
Type:           type
Subclasses:     LazyConv2d, Conv2d, ConvBn2d, Conv2d, Conv2d

c1=torch.nn.Conv2d(1,16,5) # 입력채널=1 (흑백이므로), 출력채널=16, 윈도우크기5

c1(X)

tensor([[[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]],


        [[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]],


        [[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]],


        ...,


        [[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]],


        [[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]],


        [[[-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          ...,
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881],
          [-0.0881, -0.0881, -0.0881,  ..., -0.0881, -0.0881, -0.0881]],

         [[ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          ...,
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066],
          [ 0.0066,  0.0066,  0.0066,  ...,  0.0066,  0.0066,  0.0066]],

         [[-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          ...,
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872],
          [-0.0872, -0.0872, -0.0872,  ..., -0.0872, -0.0872, -0.0872]],

         ...,

         [[ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          ...,
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584],
          [ 0.1584,  0.1584,  0.1584,  ...,  0.1584,  0.1584,  0.1584]],

         [[ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          ...,
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018],
          [ 0.0018,  0.0018,  0.0018,  ...,  0.0018,  0.0018,  0.0018]],

         [[ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          ...,
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960],
          [ 0.1960,  0.1960,  0.1960,  ...,  0.1960,  0.1960,  0.1960]]]],
       grad_fn=<MkldnnConvolutionBackward0>)

이동평균 개념에서의 window size랑 비슷하네
ref: https://towardsdatascience.com/intuitively-understanding-convolutions-for-deep-learning-1f6f42faee1
- 선형변환한 예제 $\sum{X*w}$
- 차원 변화(55 행렬인데 windowsize를 으로 주니까 33으로 변화)
  - 우리의 경우 2828 $\to$ 2424 (window size를 5로 주니까)

X.shape, c1(X).shape

(torch.Size([12396, 1, 28, 28]), torch.Size([12396, 16, 24, 24]))

fig, axs = plt.subplots(4,4) 
k=0 
for i in range(4):
    for j in range(4):
        axs[i,j].imshow(c1(X)[776][k].data) 
        k=k+1

fig.set_figheight(8)
fig.set_figwidth(8)
fig.tight_layout()
fig

ReLU() 대신 MaxPool2d + ReLU

MaxPool2d

m1=torch.nn.MaxPool2d(2)
m2=torch.nn.MaxPool2d(3)

2개로 잡아서 2개씩 잡혀서 12로 변함
3개로 잡으면 3개씩 잡혀서 8로 변함

X.shape,c1(X).shape,m1(c1(X)).shape

(torch.Size([12396, 1, 28, 28]),
 torch.Size([12396, 16, 24, 24]),
 torch.Size([12396, 16, 12, 12]))

X.shape,c1(X).shape,m2(c1(X)).shape

(torch.Size([12396, 1, 28, 28]),
 torch.Size([12396, 16, 24, 24]),
 torch.Size([12396, 16, 8, 8]))

fig, axs = plt.subplots(4,4) 
k=0 
for i in range(4):
    for j in range(4):
        axs[i,j].imshow(m1(c1(X))[776][k].data) 
        k=k+1
fig.set_figheight(8)
fig.set_figwidth(8)
fig.tight_layout()

maxpool 취하기 전의 저해상 버전,
maxpool의 역할은 해상을 낮추는 역할

fig, axs = plt.subplots(4,4) 
k=0 
for i in range(4):
    for j in range(4):
        axs[i,j].imshow(m2(c1(X))[776][k].data) 
        k=k+1
fig.set_figheight(8)
fig.set_figwidth(8)
fig.tight_layout()

ReLU

a1=torch.nn.ReLU()

X.shape,c1(X).shape, m1(c1(X)).shape, a1(m1(c1(X))).shape

(torch.Size([12396, 1, 28, 28]),
 torch.Size([12396, 16, 24, 24]),
 torch.Size([12396, 16, 12, 12]),
 torch.Size([12396, 16, 12, 12]))

X.shape,c1(X).shape, m2(c1(X)).shape, a1(m2(c1(X))).shape

(torch.Size([12396, 1, 28, 28]),
 torch.Size([12396, 16, 24, 24]),
 torch.Size([12396, 16, 8, 8]),
 torch.Size([12396, 16, 8, 8]))

그림이 비선형 변환을 거치기 때문에 그림이 조금 바뀌게 된다?!

fig, axs = plt.subplots(4,4) 
k=0 
for i in range(4):
    for j in range(4):
        axs[i,j].imshow(a1(m1(c1(X)))[776][k].data) 
        k=k+1
fig.set_figheight(8)
fig.set_figwidth(8)
fig.tight_layout()

fig, axs = plt.subplots(4,4) 
k=0 
for i in range(4):
    for j in range(4):
        axs[i,j].imshow(a1(m2(c1(X)))[776][k].data) 
        k=k+1
fig.set_figheight(8)
fig.set_figwidth(8)
fig.tight_layout()

a1

ReLU()

torch.manual_seed(1)
_A= torch.randn((3,3))
_A

tensor([[ 0.6614,  0.2669,  0.0617],
        [ 0.6213, -0.4519, -0.1661],
        [-1.5228,  0.3817, -1.0276]])

a1(_A)

tensor([[0.6614, 0.2669, 0.0617],
        [0.6213, 0.0000, 0.0000],
        [0.0000, 0.3817, 0.0000]])

$\to$ Sigmoid

- 현재상황

a1(m1(c1(X))).shape

torch.Size([12396, 16, 12, 12])

a1(m2(c1(X))).shape

torch.Size([12396, 16, 8, 8])

x에 convolution, maxpooling, ReLU 취한 상태

- 펼치자

y는 0,1 가지는 벡터, 28*28을 y에 태우기위한 작업 시작

a1(m1(c1(X))).reshape(12396,-1).shape

torch.Size([12396, 2304])

a1(m2(c1(X))).reshape(12396,-1).shape

torch.Size([12396, 1024])

- 2304의 디멘젼을 1로 만들자.

l1=torch.nn.Linear(in_features=2304,out_features=1)

l2=torch.nn.Linear(in_features=1024,out_features=1)

layer 잡아주기

l1(a1(m1(c1(X))).reshape(12396,-1))

tensor([[-0.0654],
        [-0.1279],
        [-0.2040],
        ...,
        [-0.1337],
        [ 0.0149],
        [-0.0679]], grad_fn=<AddmmBackward0>)

l2(a1(m2(c1(X))).reshape(12396,-1))

tensor([[-0.8913],
        [-0.6812],
        [-0.6057],
        ...,
        [-0.9848],
        [-0.1207],
        [-0.2860]], grad_fn=<AddmmBackward0>)

벡터로 강제 전환

- 시그모이드를 걸자.

a2=torch.nn.Sigmoid() 
a2(l1(a1(m1(c1(X))).reshape(12396,-1)))
a2(l2(a1(m2(c1(X))).reshape(12396,-1)))

tensor([[0.2908],
        [0.3360],
        [0.3530],
        ...,
        [0.2719],
        [0.4699],
        [0.4290]], grad_fn=<SigmoidBackward0>)

networks 설계

net = nn.Sequential(
    c1, # 컨볼루션(선형)
    m1, # 맥스풀링(비선형) -- 효과? 이미지를 계층적으로 파악할 수 있게함 
    a1, # 렐루(비선형) 
    
    a1(m1(c1(X))).reshape(12396,-1), ## 이걸 구현해야하는데?? 
    
    l1) 
## 마지막의 a2는 생략한다. torch.nn..BCEWithLogitsLoss()에 내장되어 있을것이므로

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-58-3565587ba507> in <module>
----> 1 net = nn.Sequential(
      2     c1, # 컨볼루션(선형)
      3     m1, # 맥스풀링(비선형) -- 효과? 이미지를 계층적으로 파악할 수 있게함
      4     a1, # 렐루(비선형)
      5 

~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/container.py in __init__(self, *args)
     89         else:
     90             for idx, module in enumerate(args):
---> 91                 self.add_module(str(idx), module)
     92 
     93     def _get_item_by_idx(self, iterator, idx) -> T:

~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/module.py in add_module(self, name, module)
    375         """
    376         if not isinstance(module, Module) and module is not None:
--> 377             raise TypeError("{} is not a Module subclass".format(
    378                 torch.typename(module)))
    379         elif not isinstance(name, torch._six.string_classes):

TypeError: torch.FloatTensor is not a Module subclass

layer를 널어야 하는데 구축이 어려움

lossfunction은 torch에 구현되있는 것으로 쓰면 굳이 activation을 구현하지 않아도 돼(지난 시간 수업 내용)

net = nn.Sequential(
    c1, # 컨볼루션(선형)
    m1, # 맥스풀링(비선형) -- 효과? 이미지를 계층적으로 파악할 수 있게함 
    a1, # 렐루(비선형) 
    
#    a1(m1(c1(X))).reshape(12396,-1), ## 이걸 구현해야하는데?? 
    
    l1) 
## 마지막의 a2는 생략한다. torch.nn..BCEWithLogitsLoss()에 내장되어 있을것이므로

- 결국 주석처리한 부분을 구현해야함.

c1??

Signature:      c1(*input, **kwargs)
Type:           Conv2d
String form:    Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
File:           ~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/conv.py
Source:        
class Conv2d(_ConvNd):
    __doc__ = r"""Applies a 2D convolution over an input signal composed of several input
    planes.

    In the simplest case, the output value of the layer with input size
    :math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`
    can be precisely described as:

    .. math::
        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
        \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)


    where :math:`\star` is the valid 2D `cross-correlation`_ operator,
    :math:`N` is a batch size, :math:`C` denotes a number of channels,
    :math:`H` is a height of input planes in pixels, and :math:`W` is
    width in pixels.
    """ + r"""

    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

    * :attr:`stride` controls the stride for the cross-correlation, a single
      number or a tuple.

    * :attr:`padding` controls the amount of padding applied to the input. It
      can be either a string {{'valid', 'same'}} or a tuple of ints giving the
      amount of implicit padding applied on both sides.

    * :attr:`dilation` controls the spacing between the kernel points; also
      known as the à trous algorithm. It is harder to describe, but this `link`_
      has a nice visualization of what :attr:`dilation` does.

    {groups_note}

    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:

        - a single ``int`` -- in which case the same value is used for the height and width dimension
        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
          and the second `int` for the width dimension

    Note:
        {depthwise_separable_note}

    Note:
        {cudnn_reproducibility_note}

    Note:
        ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
        the input so the output has the shape as the input. However, this mode
        doesn't support any stride values other than 1.

    Args:
        in_channels (int): Number of channels in the input image
        out_channels (int): Number of channels produced by the convolution
        kernel_size (int or tuple): Size of the convolving kernel
        stride (int or tuple, optional): Stride of the convolution. Default: 1
        padding (int, tuple or str, optional): Padding added to all four sides of
            the input. Default: 0
        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
        dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
        groups (int, optional): Number of blocked connections from input
            channels to output channels. Default: 1
        bias (bool, optional): If ``True``, adds a learnable bias to the
            output. Default: ``True``
    """.format(**reproducibility_notes, **convolution_notes) + r"""

    Shape:
        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
        - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where

          .. math::
              H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
                        \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor

          .. math::
              W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
                        \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor

    Attributes:
        weight (Tensor): the learnable weights of the module of shape
            :math:`(\text{out\_channels}, \frac{\text{in\_channels}}{\text{groups}},`
            :math:`\text{kernel\_size[0]}, \text{kernel\_size[1]})`.
            The values of these weights are sampled from
            :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`
        bias (Tensor):   the learnable bias of the module of shape
            (out_channels). If :attr:`bias` is ``True``,
            then the values of these weights are
            sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel\_size}[i]}`

    Examples:

        >>> # With square kernels and equal stride
        >>> m = nn.Conv2d(16, 33, 3, stride=2)
        >>> # non-square kernels and unequal stride and with padding
        >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
        >>> # non-square kernels and unequal stride and with padding and dilation
        >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
        >>> input = torch.randn(20, 16, 50, 100)
        >>> output = m(input)

    .. _cross-correlation:
        https://en.wikipedia.org/wiki/Cross-correlation

    .. _link:
        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: _size_2_t,
        stride: _size_2_t = 1,
        padding: Union[str, _size_2_t] = 0,
        dilation: _size_2_t = 1,
        groups: int = 1,
        bias: bool = True,
        padding_mode: str = 'zeros',  # TODO: refine this type
        device=None,
        dtype=None
    ) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        kernel_size_ = _pair(kernel_size)
        stride_ = _pair(stride)
        padding_ = padding if isinstance(padding, str) else _pair(padding)
        dilation_ = _pair(dilation)
        super(Conv2d, self).__init__(
            in_channels, out_channels, kernel_size_, stride_, padding_, dilation_,
            False, _pair(0), groups, bias, padding_mode, **factory_kwargs)

    def _conv_forward(self, input: Tensor, weight: Tensor, bias: Optional[Tensor]):
        if self.padding_mode != 'zeros':
            return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
                            weight, bias, self.stride,
                            _pair(0), self.dilation, self.groups)
        return F.conv2d(input, weight, bias, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self, input: Tensor) -> Tensor:
        return self._conv_forward(input, self.weight, self.bias)
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.

m1??

Signature:      m1(*input, **kwargs)
Type:           MaxPool2d
String form:    MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
File:           ~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/pooling.py
Source:        
class MaxPool2d(_MaxPoolNd):
    r"""Applies a 2D max pooling over an input signal composed of several input
    planes.

    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
    output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
    can be precisely described as:

    .. math::
        \begin{aligned}
            out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
                                                   \text{stride[1]} \times w + n)
        \end{aligned}

    If :attr:`padding` is non-zero, then the input is implicitly padded with negative infinity on both sides
    for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
    It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.

    Note:
        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
        or the input. Sliding windows that would start in the right padded region are ignored.

    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:

        - a single ``int`` -- in which case the same value is used for the height and width dimension
        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
          and the second `int` for the width dimension

    Args:
        kernel_size: the size of the window to take a max over
        stride: the stride of the window. Default value is :attr:`kernel_size`
        padding: implicit zero padding to be added on both sides
        dilation: a parameter that controls the stride of elements in the window
        return_indices: if ``True``, will return the max indices along with the outputs.
                        Useful for :class:`torch.nn.MaxUnpool2d` later
        ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape

    Shape:
        - Input: :math:`(N, C, H_{in}, W_{in})` or :math:`(C, H_{in}, W_{in})`
        - Output: :math:`(N, C, H_{out}, W_{out})` or :math:`(C, H_{out}, W_{out})`, where

          .. math::
              H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
                    \times (\text{kernel\_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor

          .. math::
              W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
                    \times (\text{kernel\_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor

    Examples::

        >>> # pool of square window of size=3, stride=2
        >>> m = nn.MaxPool2d(3, stride=2)
        >>> # pool of non-square window
        >>> m = nn.MaxPool2d((3, 2), stride=(2, 1))
        >>> input = torch.randn(20, 16, 50, 32)
        >>> output = m(input)

    .. _link:
        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
    """

    kernel_size: _size_2_t
    stride: _size_2_t
    padding: _size_2_t
    dilation: _size_2_t

    def forward(self, input: Tensor) -> Tensor:
        return F.max_pool2d(input, self.kernel_size, self.stride,
                            self.padding, self.dilation, self.ceil_mode,
                            self.return_indices)
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.

a1??

Signature:      a1(*input, **kwargs)
Type:           ReLU
String form:    ReLU()
File:           ~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/activation.py
Source:        
class ReLU(Module):
    r"""Applies the rectified linear unit function element-wise:

    :math:`\text{ReLU}(x) = (x)^+ = \max(0, x)`

    Args:
        inplace: can optionally do the operation in-place. Default: ``False``

    Shape:
        - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
        - Output: :math:`(*)`, same shape as the input.

    .. image:: ../scripts/activation_images/ReLU.png

    Examples::

        >>> m = nn.ReLU()
        >>> input = torch.randn(2)
        >>> output = m(input)


      An implementation of CReLU - https://arxiv.org/abs/1603.05201

        >>> m = nn.ReLU()
        >>> input = torch.randn(2).unsqueeze(0)
        >>> output = torch.cat((m(input),m(-input)))
    """
    __constants__ = ['inplace']
    inplace: bool

    def __init__(self, inplace: bool = False):
        super(ReLU, self).__init__()
        self.inplace = inplace

    def forward(self, input: Tensor) -> Tensor:
        return F.relu(input, inplace=self.inplace)

    def extra_repr(self) -> str:
        inplace_str = 'inplace=True' if self.inplace else ''
        return inplace_str
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.

l1??

Signature:      l1(*input, **kwargs)
Type:           Linear
String form:    Linear(in_features=2304, out_features=1, bias=True)
File:           ~/anaconda3/envs/csy/lib/python3.8/site-packages/torch/nn/modules/linear.py
Source:        
class Linear(Module):
    r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``

    Shape:
        - Input: :math:`(*, H_{in})` where :math:`*` means any number of
          dimensions including none and :math:`H_{in} = \text{in\_features}`.
        - Output: :math:`(*, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples::

        >>> m = nn.Linear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """
    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
        # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
        # https://github.com/pytorch/pytorch/issues/57109
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> Tensor:
        return F.linear(input, self.weight, self.bias)

    def extra_repr(self) -> str:
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )
Init docstring: Initializes internal Module state, shared by both nn.Module and ScriptModule.

- c1,m1,a1,l1의 공통점

무언가를 상속받는 클래스에서 생성된 인스턴스이다.
forward메소드가 있다.

- custom layer를 만드는 방법

torch.nn.Module을 상속받아서 클래스를 하나 만든다.
forward 메소드를 정의한다. (다음레이어로 리턴할 값)

class Flatten(torch.nn.Module):
    def forward(self,x): 
        return x.reshape(12396,-1)

flatten=Flatten()

flatten(a1(m1(c1(X)))).shape

torch.Size([12396, 2304])

flatten(a1(m2(c1(X)))).shape

torch.Size([12396, 1024])

- 잘 구현이 된것 같다.

net1 = nn.Sequential(
    c1, # 컨볼루션(선형)
    m1, # 맥스풀링(비선형) -- 효과? 이미지를 계층적으로 파악할 수 있게함 
    a1, # 렐루(비선형) 
    flatten,#    a1(m1(c1(X))).reshape(12396,-1), ## 이걸 구현해야하는데?? 
    l1) 
## 마지막의 a2는 생략한다. torch.nn..BCEWithLogitsLoss()에 내장되어 있을것이므로

net2 = nn.Sequential(
    c1, # 컨볼루션(선형)
    m2, # 맥스풀링(비선형) -- 효과? 이미지를 계층적으로 파악할 수 있게함 
    a1, # 렐루(비선형) 
    flatten,#    a1(m1(c1(X))).reshape(12396,-1), ## 이걸 구현해야하는데?? 
    l2) 
## 마지막의 a2는 생략한다. torch.nn..BCEWithLogitsLoss()에 내장되어 있을것이므로

- 손실함수와 옵티마이저 정의

loss_fn=torch.nn.BCEWithLogitsLoss()
optimizer= torch.optim.Adam(net.parameters())

- step1~4

for epoc in range(200): 
    ## 1 
    yhat1=net1(X)
    ## 2 
    loss=loss_fn(yhat1,y) 
    ## 3 
    loss.backward()
    ## 4 
    optimizer.step()
    net1.zero_grad()

for epoc in range(200): 
    ## 1 
    yhat2=net2(X)
    ## 2 
    loss=loss_fn(yhat2,y) 
    ## 3 
    loss.backward()
    ## 4 
    optimizer.step()
    net2.zero_grad()

plt.plot(y)
plt.plot(a2(yhat1.data),'.')

[<matplotlib.lines.Line2D at 0x7f163fd28790>]

plt.plot(y)
plt.plot(a2(yhat2.data),'.')

[<matplotlib.lines.Line2D at 0x7f1640398ee0>]

ypred1=a2(yhat1.data)>0.5

ypred2=a2(yhat2.data)>0.5

sum(ypred1==y)/12396

tensor([0.9930])

sum(ypred2==y)/12396

tensor([0.9954])

- 좀 더 성능이 좋아졌다. (이미 좋았는데 약간 더 좋아짐)