imports

import numpy as np
import matplotlib.pyplot as plt 
import tensorflow as tf 
import tensorflow.experimental.numpy as tnp 
tnp.experimental_enable_numpy_behavior()
%load_ext tensorboard
import graphviz
def gv(s): return graphviz.Source('digraph G{ rankdir="LR"'+ s + ';}')

1. Fashion_mnist, DNN (30점)

(1) tf.keras.datasets.fashion_mnist.load_data()을 이용하여 fashion_mnist 자료를 불러온 뒤 아래의 네트워크를 이용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train.shape, y_train.shape, x_test.shape, y_test.shape
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))
X = tf.constant(x_train.reshape(-1,28,28,1),dtype=tf.float64)
y = tf.keras.utils.to_categorical(y_train)
XX = tf.constant(x_test.reshape(-1,28,28,1),dtype=tf.float64)
yy = tf.keras.utils.to_categorical(y_test)
  • 평가지표로 accuracy를 이용할 것
  • epoch은 10으로 설정할 것
  • optimizer는 adam을 이용할 것

gv('''
splines=line
subgraph cluster_1{
    style=filled;
    color=lightgrey;
    "x1"
    "x2"
    ".."
    "x784"
    label = "Layer 0"
}
subgraph cluster_2{
    style=filled;
    color=lightgrey;
    "x1" -> "node1"
    "x2" -> "node1"
    ".." -> "node1"
    "x784" -> "node1"
    
    "x1" -> "node2"
    "x2" -> "node2"
    ".." -> "node2"
    "x784" -> "node2"
    
    "x1" -> "..."
    "x2" -> "..."
    ".." -> "..."
    "x784" -> "..."

    "x1" -> "node20"
    "x2" -> "node20"
    ".." -> "node20"
    "x784" -> "node20"


    label = "Layer 1: relu"
}
subgraph cluster_3{
    style=filled;
    color=lightgrey;
    "node1" -> "node1 "
    "node2" -> "node1 "
    "..." -> "node1 "
    "node20" -> "node1 "
    
    "node1" -> "node2 "
    "node2" -> "node2 "
    "..." -> "node2 "
    "node20" -> "node2 "
    
    "node1" -> "... "
    "node2" -> "... "
    "..." -> "... "
    "node20" -> "... "

    "node1" -> "node30 "
    "node2" -> "node30 "
    "..." -> "node30 "
    "node20" -> "node30 "


    label = "Layer 2: relu"
}
subgraph cluster_4{
    style=filled;
    color=lightgrey;

    "node1 " -> "y10"
    "node2 " -> "y10"
    "... " -> "y10"
    "node30 " -> "y10"
    
    "node1 " -> "y1"
    "node2 " -> "y1"
    "... " -> "y1"
    "node30 " -> "y1"
    
    "node1 " -> "."
    "node2 " -> "."
    "... " -> "."
    "node30 " -> "."
    
    label = "Layer 3: softmax"
}
''')
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> G cluster_1 Layer 0 cluster_2 Layer 1: relu cluster_3 Layer 2: relu cluster_4 Layer 3: softmax x1 x1 node1 node1 x1->node1 node2 node2 x1->node2 ... ... x1->... node20 node20 x1->node20 x2 x2 x2->node1 x2->node2 x2->... x2->node20 .. .. ..->node1 ..->node2 ..->... ..->node20 x784 x784 x784->node1 x784->node2 x784->... x784->node20 node1 node1 node1->node1 node2 node2 node1->node2 ... ... node1->... node30 node30 node1->node30 node2->node1 node2->node2 node2->... node2->node30 ...->node1 ...->node2 ...->... ...->node30 node20->node1 node20->node2 node20->... node20->node30 y10 y10 node1 ->y10 y1 y1 node1 ->y1 . . node1 ->. node2 ->y10 node2 ->y1 node2 ->. ... ->y10 ... ->y1 ... ->. node30 ->y10 node30 ->y1 node30 ->.
tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=10,batch_size=200)
Epoch 1/10
 48/300 [===>..........................] - ETA: 0s - loss: 10.8146 - accuracy: 0.1110
2022-06-13 19:59:51.192143: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
300/300 [==============================] - 2s 4ms/step - loss: 3.3773 - accuracy: 0.1935
Epoch 2/10
300/300 [==============================] - 1s 4ms/step - loss: 1.7547 - accuracy: 0.2829
Epoch 3/10
300/300 [==============================] - 1s 4ms/step - loss: 1.5530 - accuracy: 0.3703
Epoch 4/10
300/300 [==============================] - 1s 4ms/step - loss: 1.3897 - accuracy: 0.4294
Epoch 5/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2927 - accuracy: 0.4505
Epoch 6/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2590 - accuracy: 0.4573
Epoch 7/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2366 - accuracy: 0.4611
Epoch 8/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2226 - accuracy: 0.4668
Epoch 9/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2101 - accuracy: 0.4726
Epoch 10/10
300/300 [==============================] - 1s 4ms/step - loss: 1.1183 - accuracy: 0.5219
<keras.callbacks.History at 0x7fa3399a4580>
net.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten (Flatten)           (200, 784)                0         
                                                                 
 dense (Dense)               (200, 20)                 15700     
                                                                 
 dense_1 (Dense)             (200, 30)                 630       
                                                                 
 dense_2 (Dense)             (200, 10)                 310       
                                                                 
=================================================================
Total params: 16,640
Trainable params: 16,640
Non-trainable params: 0
_________________________________________________________________

(2) (1)에서 적합된 네트워크를 이용하여 test data의 accuracy를 구하라.

net.evaluate(XX,yy)[1]
313/313 [==============================] - 1s 3ms/step - loss: 1.0767 - accuracy: 0.5299
0.5299000144004822

(3) train set에서 20%의 자료를 validation 으로 분리하여 50에폭동안 학습하라. 텐서보드를 이용하여 train accuracy와 validation accuracy를 시각화 하고 결과를 해석하라. 오버피팅이라고 볼 수 있는가?

tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
cb1 = tf.keras.callbacks.TensorBoard()
net.fit(X,y,epochs=50,batch_size=200,validation_split=0.2,callbacks=cb1,verbose=1)
Epoch 1/50
240/240 [==============================] - 1s 5ms/step - loss: 3.5097 - accuracy: 0.1968 - val_loss: 1.8552 - val_accuracy: 0.2494
Epoch 2/50
240/240 [==============================] - 1s 5ms/step - loss: 1.7564 - accuracy: 0.2964 - val_loss: 1.6947 - val_accuracy: 0.3108
Epoch 3/50
240/240 [==============================] - 1s 5ms/step - loss: 1.6191 - accuracy: 0.3477 - val_loss: 1.5541 - val_accuracy: 0.3734
Epoch 4/50
240/240 [==============================] - 1s 5ms/step - loss: 1.4824 - accuracy: 0.4025 - val_loss: 1.4276 - val_accuracy: 0.4159
Epoch 5/50
240/240 [==============================] - 1s 5ms/step - loss: 1.3536 - accuracy: 0.4411 - val_loss: 1.3198 - val_accuracy: 0.4467
Epoch 6/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2807 - accuracy: 0.4556 - val_loss: 1.2753 - val_accuracy: 0.4556
Epoch 7/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2510 - accuracy: 0.4594 - val_loss: 1.2603 - val_accuracy: 0.4574
Epoch 8/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2330 - accuracy: 0.4637 - val_loss: 1.2431 - val_accuracy: 0.4630
Epoch 9/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2188 - accuracy: 0.4697 - val_loss: 1.2351 - val_accuracy: 0.4600
Epoch 10/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2067 - accuracy: 0.4711 - val_loss: 1.2216 - val_accuracy: 0.4864
Epoch 11/50
240/240 [==============================] - 1s 5ms/step - loss: 1.1461 - accuracy: 0.5168 - val_loss: 1.1744 - val_accuracy: 0.4754
Epoch 12/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0677 - accuracy: 0.5422 - val_loss: 1.0636 - val_accuracy: 0.5376
Epoch 13/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0441 - accuracy: 0.5515 - val_loss: 1.0395 - val_accuracy: 0.5437
Epoch 14/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0226 - accuracy: 0.5614 - val_loss: 1.0468 - val_accuracy: 0.5420
Epoch 15/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0128 - accuracy: 0.5653 - val_loss: 1.0892 - val_accuracy: 0.5123
Epoch 16/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0035 - accuracy: 0.5689 - val_loss: 1.0193 - val_accuracy: 0.5533
Epoch 17/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9862 - accuracy: 0.5775 - val_loss: 1.0032 - val_accuracy: 0.5740
Epoch 18/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9781 - accuracy: 0.5812 - val_loss: 0.9953 - val_accuracy: 0.5825
Epoch 19/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9797 - accuracy: 0.5779 - val_loss: 0.9879 - val_accuracy: 0.5810
Epoch 20/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9715 - accuracy: 0.5822 - val_loss: 1.0017 - val_accuracy: 0.5764
Epoch 21/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9582 - accuracy: 0.5892 - val_loss: 0.9579 - val_accuracy: 0.5933
Epoch 22/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9258 - accuracy: 0.6129 - val_loss: 0.9111 - val_accuracy: 0.6306
Epoch 23/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8791 - accuracy: 0.6423 - val_loss: 0.8959 - val_accuracy: 0.6424
Epoch 24/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8541 - accuracy: 0.6544 - val_loss: 0.8833 - val_accuracy: 0.6637
Epoch 25/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8301 - accuracy: 0.6671 - val_loss: 0.8632 - val_accuracy: 0.6641
Epoch 26/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8253 - accuracy: 0.6659 - val_loss: 0.8626 - val_accuracy: 0.6727
Epoch 27/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8207 - accuracy: 0.6669 - val_loss: 0.8595 - val_accuracy: 0.6400
Epoch 28/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7959 - accuracy: 0.6794 - val_loss: 0.8151 - val_accuracy: 0.6730
Epoch 29/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7840 - accuracy: 0.6826 - val_loss: 0.8324 - val_accuracy: 0.6733
Epoch 30/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7770 - accuracy: 0.6865 - val_loss: 0.7973 - val_accuracy: 0.6892
Epoch 31/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7670 - accuracy: 0.6911 - val_loss: 0.7877 - val_accuracy: 0.6934
Epoch 32/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7633 - accuracy: 0.6913 - val_loss: 0.7996 - val_accuracy: 0.6836
Epoch 33/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7481 - accuracy: 0.6967 - val_loss: 0.7861 - val_accuracy: 0.6963
Epoch 34/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7396 - accuracy: 0.7008 - val_loss: 0.7612 - val_accuracy: 0.7003
Epoch 35/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7389 - accuracy: 0.6998 - val_loss: 0.7556 - val_accuracy: 0.7011
Epoch 36/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7380 - accuracy: 0.6976 - val_loss: 0.7496 - val_accuracy: 0.7014
Epoch 37/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7308 - accuracy: 0.7039 - val_loss: 0.7562 - val_accuracy: 0.6988
Epoch 38/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7295 - accuracy: 0.7047 - val_loss: 0.8636 - val_accuracy: 0.6724
Epoch 39/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7266 - accuracy: 0.7031 - val_loss: 0.7548 - val_accuracy: 0.7009
Epoch 40/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7215 - accuracy: 0.7068 - val_loss: 0.7552 - val_accuracy: 0.6841
Epoch 41/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7150 - accuracy: 0.7137 - val_loss: 0.7958 - val_accuracy: 0.7033
Epoch 42/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7016 - accuracy: 0.7319 - val_loss: 0.7204 - val_accuracy: 0.7433
Epoch 43/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6759 - accuracy: 0.7477 - val_loss: 0.7000 - val_accuracy: 0.7495
Epoch 44/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6561 - accuracy: 0.7523 - val_loss: 0.6759 - val_accuracy: 0.7585
Epoch 45/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6727 - accuracy: 0.7429 - val_loss: 0.6661 - val_accuracy: 0.7630
Epoch 46/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6734 - accuracy: 0.7401 - val_loss: 0.6809 - val_accuracy: 0.7562
Epoch 47/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6654 - accuracy: 0.7476 - val_loss: 0.7254 - val_accuracy: 0.7386
Epoch 48/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6624 - accuracy: 0.7493 - val_loss: 0.7612 - val_accuracy: 0.7347
Epoch 49/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6360 - accuracy: 0.7587 - val_loss: 0.6741 - val_accuracy: 0.7641
Epoch 50/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6333 - accuracy: 0.7595 - val_loss: 0.6873 - val_accuracy: 0.7602
<keras.callbacks.History at 0x7fa3385e6a70>
%tensorboard --logdir logs --host 0.0.0.0
Reusing TensorBoard on port 6006 (pid 2334664), started 0:00:00 ago. (Use '!kill 2334664' to kill it.)

train accuracy보다validation accuracy accuracy가 0.01정도 높게 나왔다.

우리의 목적은 validation accuracy가 더 좋게 나오길 원하는 것이었다.

training loss와 같이 validation loss도 줄어들고 있었다.

비록 중간에 조금 상승하는 구간이 보이지만 결국 training loss와 비슷한 값에 수렴했다.(0.01 정도 차이)

오버피팅은 아니다.

(4) (3)에서 적합된 네트워크를 이용하여 test data의 accuracy를 구하라. (2)의 결과와 비교하라.

net.evaluate(XX,yy)[1]
313/313 [==============================] - 1s 4ms/step - loss: 0.6851 - accuracy: 0.7576
0.7576000094413757

validation을 split하여 training하니 정확도가 높아졌다.

20%의 validation을 제외한 training data로 학습을 한 것보다 20%의 validation의 loss가 더 낮아지는 결과가 도출되었다.

(5) 조기종료기능을 이용하여 (3)의 네트워크를 다시 학습하라. 학습결과를 텐서보드를 이용하여 시각화 하라.

  • patience=3 으로 설정할 것
tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
cb1 = tf.keras.callbacks.TensorBoard()
cb2 = tf.keras.callbacks.EarlyStopping(patience=3)
net.fit(X,y,epochs=50,batch_size=200,validation_split=0.2,callbacks=[cb1,cb2]) 
Epoch 1/50
240/240 [==============================] - 1s 5ms/step - loss: 3.5097 - accuracy: 0.1968 - val_loss: 1.8552 - val_accuracy: 0.2495
Epoch 2/50
240/240 [==============================] - 1s 5ms/step - loss: 1.7564 - accuracy: 0.2976 - val_loss: 1.7000 - val_accuracy: 0.3088
Epoch 3/50
240/240 [==============================] - 1s 5ms/step - loss: 1.6320 - accuracy: 0.3394 - val_loss: 1.5627 - val_accuracy: 0.3705
Epoch 4/50
240/240 [==============================] - 1s 5ms/step - loss: 1.4916 - accuracy: 0.3990 - val_loss: 1.4359 - val_accuracy: 0.4124
Epoch 5/50
240/240 [==============================] - 1s 5ms/step - loss: 1.3586 - accuracy: 0.4399 - val_loss: 1.3286 - val_accuracy: 0.4457
Epoch 6/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2821 - accuracy: 0.4539 - val_loss: 1.2821 - val_accuracy: 0.4557
Epoch 7/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2511 - accuracy: 0.4580 - val_loss: 1.2649 - val_accuracy: 0.4557
Epoch 8/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2341 - accuracy: 0.4615 - val_loss: 1.2549 - val_accuracy: 0.4636
Epoch 9/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2224 - accuracy: 0.4684 - val_loss: 1.2336 - val_accuracy: 0.4601
Epoch 10/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2068 - accuracy: 0.4798 - val_loss: 1.1934 - val_accuracy: 0.5043
Epoch 11/50
240/240 [==============================] - 1s 5ms/step - loss: 1.1191 - accuracy: 0.5219 - val_loss: 1.0872 - val_accuracy: 0.5314
Epoch 12/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0596 - accuracy: 0.5439 - val_loss: 1.0735 - val_accuracy: 0.5347
Epoch 13/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0472 - accuracy: 0.5496 - val_loss: 1.0367 - val_accuracy: 0.5787
Epoch 14/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0129 - accuracy: 0.5654 - val_loss: 1.0254 - val_accuracy: 0.5563
Epoch 15/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0140 - accuracy: 0.5600 - val_loss: 1.0216 - val_accuracy: 0.5648
Epoch 16/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9891 - accuracy: 0.5724 - val_loss: 1.0071 - val_accuracy: 0.5807
Epoch 17/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9796 - accuracy: 0.5842 - val_loss: 0.9929 - val_accuracy: 0.5893
Epoch 18/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9192 - accuracy: 0.6306 - val_loss: 0.9279 - val_accuracy: 0.6523
Epoch 19/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8808 - accuracy: 0.6499 - val_loss: 0.9217 - val_accuracy: 0.6363
Epoch 20/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8497 - accuracy: 0.6567 - val_loss: 0.8369 - val_accuracy: 0.6768
Epoch 21/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8256 - accuracy: 0.6671 - val_loss: 0.8545 - val_accuracy: 0.6589
Epoch 22/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7959 - accuracy: 0.6786 - val_loss: 0.8021 - val_accuracy: 0.6838
Epoch 23/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7815 - accuracy: 0.6837 - val_loss: 0.8211 - val_accuracy: 0.6817
Epoch 24/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7778 - accuracy: 0.6858 - val_loss: 0.8233 - val_accuracy: 0.6851
Epoch 25/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7726 - accuracy: 0.6897 - val_loss: 0.8009 - val_accuracy: 0.6902
Epoch 26/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7611 - accuracy: 0.6940 - val_loss: 0.8029 - val_accuracy: 0.6888
Epoch 27/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7456 - accuracy: 0.6973 - val_loss: 0.7932 - val_accuracy: 0.6552
Epoch 28/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7449 - accuracy: 0.6982 - val_loss: 0.7733 - val_accuracy: 0.6977
Epoch 29/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7433 - accuracy: 0.7032 - val_loss: 0.7832 - val_accuracy: 0.6639
Epoch 30/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7389 - accuracy: 0.7104 - val_loss: 0.7495 - val_accuracy: 0.7179
Epoch 31/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7245 - accuracy: 0.7282 - val_loss: 0.8147 - val_accuracy: 0.6921
Epoch 32/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6991 - accuracy: 0.7388 - val_loss: 0.7216 - val_accuracy: 0.7407
Epoch 33/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6856 - accuracy: 0.7396 - val_loss: 0.7015 - val_accuracy: 0.7459
Epoch 34/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6776 - accuracy: 0.7435 - val_loss: 0.6934 - val_accuracy: 0.7420
Epoch 35/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6619 - accuracy: 0.7497 - val_loss: 0.8090 - val_accuracy: 0.6999
Epoch 36/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6755 - accuracy: 0.7445 - val_loss: 0.6897 - val_accuracy: 0.7393
Epoch 37/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6769 - accuracy: 0.7442 - val_loss: 0.7397 - val_accuracy: 0.7286
Epoch 38/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6612 - accuracy: 0.7511 - val_loss: 0.6717 - val_accuracy: 0.7623
Epoch 39/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6504 - accuracy: 0.7571 - val_loss: 0.7055 - val_accuracy: 0.7423
Epoch 40/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6527 - accuracy: 0.7571 - val_loss: 0.7266 - val_accuracy: 0.7376
Epoch 41/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6363 - accuracy: 0.7688 - val_loss: 0.6879 - val_accuracy: 0.7689
<keras.callbacks.History at 0x7fa338523f10>
%tensorboard --logdir logs --host 0.0.0.0
Reusing TensorBoard on port 6006 (pid 2336139), started 0:00:00 ago. (Use '!kill 2336139' to kill it.)

2. Fashion_mnist, CNN (30점)

(1) tf.keras.datasets.fashion_mnist.load_data()을 이용하여 fashion_mnist 자료를 불러온 뒤 아래의 네트워크를 이용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X = tf.constant(x_train.reshape(-1,28,28,1),dtype=tf.float64)
y = tf.keras.utils.to_categorical(y_train)
XX = tf.constant(x_test.reshape(-1,28,28,1),dtype=tf.float64)
yy = tf.keras.utils.to_categorical(y_test)
  • 이때 n1=6, n2=16, n3=120 으로 설정한다, 드랍아웃비율은 20%로 설정한다.
  • net.summary()를 출력하여 설계결과를 확인하라.

tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(6,(5,5),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Conv2D(16,(5,5),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='relu'))
net.add(tf.keras.layers.Dropout(0.2))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=5)
Epoch 1/5
2022-06-13 20:03:22.250039: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8201
2022-06-13 20:03:23.016809: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-13 20:03:23.017705: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-13 20:03:23.017719: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-06-13 20:03:23.018571: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-13 20:03:23.018610: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Failed to launch ptxas
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
1875/1875 [==============================] - 11s 4ms/step - loss: 0.8764 - accuracy: 0.7668
Epoch 2/5
1875/1875 [==============================] - 8s 4ms/step - loss: 0.4300 - accuracy: 0.8453
Epoch 3/5
1875/1875 [==============================] - 8s 4ms/step - loss: 0.3857 - accuracy: 0.8591
Epoch 4/5
1875/1875 [==============================] - 8s 5ms/step - loss: 0.3606 - accuracy: 0.8685
Epoch 5/5
1875/1875 [==============================] - 8s 4ms/step - loss: 0.3393 - accuracy: 0.8748
<keras.callbacks.History at 0x7fa338346470>
net.layers
[<keras.layers.convolutional.Conv2D at 0x7fa3385c4ca0>,
 <keras.layers.pooling.MaxPooling2D at 0x7fa33844e8c0>,
 <keras.layers.convolutional.Conv2D at 0x7fa3385c7fa0>,
 <keras.layers.pooling.MaxPooling2D at 0x7fa33861d990>,
 <keras.layers.core.flatten.Flatten at 0x7fa338523df0>,
 <keras.layers.core.dense.Dense at 0x7fa3385c4160>,
 <keras.layers.core.dropout.Dropout at 0x7fa33830eb30>,
 <keras.layers.core.dense.Dense at 0x7fa33830e740>]
c1,m1,c2,m2,flttn,rel,dro,dns = net.layers

print(X.shape) 
print(c1(X).shape) 
print(m1(c1(X)).shape) 
print(c2(m1(c1(X))).shape) 
print(m2(c2(m1(c1(X)))).shape) 
print(flttn(m2(c2(m1(c1(X))))).shape) 
print(rel(flttn(m2(c2(m1(c1(X)))))).shape) 
print(dro(rel(flttn(m2(c2(m1(c1(X))))))).shape) 
print(dns(dro(rel(flttn(m2(c2(m1(c1(X)))))))).shape) 
(60000, 28, 28, 1)
(60000, 24, 24, 6)
(60000, 12, 12, 6)
(60000, 8, 8, 16)
(60000, 4, 4, 16)
(60000, 256)
(60000, 120)
(60000, 120)
(60000, 10)
net.summary()
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (32, 24, 24, 6)           156       
                                                                 
 max_pooling2d (MaxPooling2D  (32, 12, 12, 6)          0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (32, 8, 8, 16)            2416      
                                                                 
 max_pooling2d_1 (MaxPooling  (32, 4, 4, 16)           0         
 2D)                                                             
                                                                 
 flatten_3 (Flatten)         (32, 256)                 0         
                                                                 
 dense_9 (Dense)             (32, 120)                 30840     
                                                                 
 dropout (Dropout)           (32, 120)                 0         
                                                                 
 dense_10 (Dense)            (32, 10)                  1210      
                                                                 
=================================================================
Total params: 34,622
Trainable params: 34,622
Non-trainable params: 0
_________________________________________________________________

(2) n1=(6,64,128), n2=(16,256)에 대하여 test set의 loss가 최소화되는 조합을 찾아라. 결과를 텐서보드로 시각화하는 코드를 작성하라.

  • epoc은 3회로 한정한다.
  • validation_split은 0.2로 설정한다.
from tensorboard.plugins.hparams import api as hp
a=[]
tf.random.set_seed(1213)
!rm -rf logs
for u in [6,64,128]: 
    for d in [16,256]: 
        logdir = 'logs/hp_{}_{}'.format(u,d)
        with tf.summary.create_file_writer(logdir).as_default():
            net = tf.keras.Sequential()
            net.add(tf.keras.layers.Conv2D(u,(5,5),activation='relu'))
            net.add(tf.keras.layers.MaxPool2D())
            net.add(tf.keras.layers.Conv2D(d,(5,5),activation='relu'))
            net.add(tf.keras.layers.MaxPool2D())
            net.add(tf.keras.layers.Flatten())
            net.add(tf.keras.layers.Dense(120,activation='relu'))
            net.add(tf.keras.layers.Dropout(0.2))
            net.add(tf.keras.layers.Dense(10,activation='softmax'))
            net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
            cb3 = hp.KerasCallback(logdir, {'n1':u, 'n2':d})
            net.fit(X,y,epochs=3,batch_size=200,validation_split=0.2,callbacks=cb3)
            _rslt=net.evaluate(XX,yy)
            a.append(_rslt[0])
            tf.summary.scalar('loss(테스트셋)',_rslt[0], step=1) 
Epoch 1/3
240/240 [==============================] - 2s 7ms/step - loss: 2.2067 - accuracy: 0.6588 - val_loss: 0.5796 - val_accuracy: 0.7888
Epoch 2/3
240/240 [==============================] - 2s 6ms/step - loss: 0.5856 - accuracy: 0.7887 - val_loss: 0.4798 - val_accuracy: 0.8223
Epoch 3/3
240/240 [==============================] - 2s 6ms/step - loss: 0.4963 - accuracy: 0.8220 - val_loss: 0.4457 - val_accuracy: 0.8381
313/313 [==============================] - 1s 4ms/step - loss: 0.4599 - accuracy: 0.8341
Epoch 1/3
240/240 [==============================] - 2s 6ms/step - loss: 1.2991 - accuracy: 0.7496 - val_loss: 0.3893 - val_accuracy: 0.8648
Epoch 2/3
240/240 [==============================] - 2s 6ms/step - loss: 0.4053 - accuracy: 0.8583 - val_loss: 0.3367 - val_accuracy: 0.8772
Epoch 3/3
240/240 [==============================] - 2s 6ms/step - loss: 0.3342 - accuracy: 0.8802 - val_loss: 0.3240 - val_accuracy: 0.8815
313/313 [==============================] - 1s 4ms/step - loss: 0.3488 - accuracy: 0.8724
Epoch 1/3
240/240 [==============================] - 2s 7ms/step - loss: 1.6602 - accuracy: 0.5762 - val_loss: 0.6718 - val_accuracy: 0.7639
Epoch 2/3
240/240 [==============================] - 2s 6ms/step - loss: 0.6426 - accuracy: 0.7711 - val_loss: 0.5514 - val_accuracy: 0.8059
Epoch 3/3
240/240 [==============================] - 2s 6ms/step - loss: 0.5485 - accuracy: 0.8016 - val_loss: 0.5018 - val_accuracy: 0.8209
313/313 [==============================] - 1s 4ms/step - loss: 0.5280 - accuracy: 0.8116
Epoch 1/3
240/240 [==============================] - 2s 7ms/step - loss: 1.4713 - accuracy: 0.7600 - val_loss: 0.3538 - val_accuracy: 0.8746
Epoch 2/3
240/240 [==============================] - 2s 7ms/step - loss: 0.3584 - accuracy: 0.8755 - val_loss: 0.2978 - val_accuracy: 0.8906
Epoch 3/3
240/240 [==============================] - 2s 7ms/step - loss: 0.2945 - accuracy: 0.8959 - val_loss: 0.2770 - val_accuracy: 0.8998
313/313 [==============================] - 1s 4ms/step - loss: 0.2970 - accuracy: 0.8938
Epoch 1/3
240/240 [==============================] - 2s 7ms/step - loss: 1.4521 - accuracy: 0.5778 - val_loss: 0.6687 - val_accuracy: 0.7469
Epoch 2/3
240/240 [==============================] - 2s 7ms/step - loss: 0.5967 - accuracy: 0.7826 - val_loss: 0.4889 - val_accuracy: 0.8217
Epoch 3/3
240/240 [==============================] - 2s 7ms/step - loss: 0.4715 - accuracy: 0.8295 - val_loss: 0.4049 - val_accuracy: 0.8512
313/313 [==============================] - 1s 4ms/step - loss: 0.4290 - accuracy: 0.8466
Epoch 1/3
240/240 [==============================] - 2s 9ms/step - loss: 2.0724 - accuracy: 0.7774 - val_loss: 0.3449 - val_accuracy: 0.8714
Epoch 2/3
240/240 [==============================] - 2s 9ms/step - loss: 0.3367 - accuracy: 0.8816 - val_loss: 0.2989 - val_accuracy: 0.8938
Epoch 3/3
240/240 [==============================] - 2s 9ms/step - loss: 0.2806 - accuracy: 0.9003 - val_loss: 0.2765 - val_accuracy: 0.8994
313/313 [==============================] - 1s 4ms/step - loss: 0.2918 - accuracy: 0.8977
%tensorboard --logdir logs --host 0.0.0.0
Reusing TensorBoard on port 6006 (pid 2337227), started 0:00:00 ago. (Use '!kill 2337227' to kill it.)
a
[0.4599493145942688,
 0.3487989902496338,
 0.5280426740646362,
 0.29704976081848145,
 0.4290454685688019,
 0.2917799949645996]

위는 순서대로

  • $n_1$=(6), $n_2$=(16)
  • $n_1$=(6), $n_2$=(256)
  • $n_1$=(64), $n_2$=(16)
  • $n_1$=(64), $n_2$=(256)
  • $n_1$=(128), $n_2$=(16)
  • $n_1$=(128), $n_2$=(256)

을 나타낸다.

$\therefore$ $n_1=128, n_2 = 256$에서 test set의 loss가 $0.2917799949645996$로 최소화된다.

3. CIFAR10 (30점)

tf.keras.datasets.cifar10.load_data()을 이용하여 CIFAR10을 불러온 뒤 적당한 네트워크를 사용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train.shape, y_train.shape, x_test.shape, y_test.shape
((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))
X=x_train.reshape(-1,32,32,3)/255 # 50000 by 1024 by 
y=tf.keras.utils.to_categorical(y_train) 
XX=x_test.reshape(-1,32,32,3)/255
yy=tf.keras.utils.to_categorical(y_test)
X.shape,y.shape,XX.shape,yy.shape
((50000, 32, 32, 3), (50000, 10), (10000, 32, 32, 3), (10000, 10))
  • 결과를 텐서보드로 시각화할 필요는 없다.
  • 자유롭게 모형을 설계하여 적합하라.
  • test set의 accuracy가 70%이상인 경우만 정답으로 인정한다.
tf.random.set_seed(1213)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(98,(2,2),activation='relu'))
net.add(tf.keras.layers.Conv2D(98,(2,2),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Conv2D(256,(2,2),activation='relu'))
net.add(tf.keras.layers.Conv2D(256,(2,2),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(256,activation='relu'))
net.add(tf.keras.layers.Dropout(0.2))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=5,validation_split=0.2)
Epoch 1/5
1250/1250 [==============================] - 9s 7ms/step - loss: 1.4415 - accuracy: 0.4779 - val_loss: 1.0749 - val_accuracy: 0.6222
Epoch 2/5
1250/1250 [==============================] - 8s 6ms/step - loss: 0.9981 - accuracy: 0.6501 - val_loss: 0.8999 - val_accuracy: 0.6788
Epoch 3/5
1250/1250 [==============================] - 8s 6ms/step - loss: 0.8006 - accuracy: 0.7202 - val_loss: 0.8743 - val_accuracy: 0.6938
Epoch 4/5
1250/1250 [==============================] - 8s 6ms/step - loss: 0.6608 - accuracy: 0.7704 - val_loss: 0.7830 - val_accuracy: 0.7354
Epoch 5/5
1250/1250 [==============================] - 8s 6ms/step - loss: 0.5382 - accuracy: 0.8094 - val_loss: 0.7689 - val_accuracy: 0.7462
<keras.callbacks.History at 0x7fa1f59b8b50>
net.evaluate(XX,yy)[1]
313/313 [==============================] - 1s 4ms/step - loss: 0.8024 - accuracy: 0.7410
0.7409999966621399

4. 다음을 읽고 물음에 답하라. (10점)

(1) (128,128,3)의 shape을 가진 텐서가 tf.keras.layers.Conv2D(5,(2,2))으로 만들어진 커널을 통과할시 나오는 shape은?

cnv = tf.keras.layers.Conv2D(5,(2,2))
XXX = tnp.arange(1*128*128*3,dtype=tf.float64).reshape(1,128,128,3)
cnv(XXX).shape
TensorShape([1, 127, 127, 5])

answer: (1, 127, 127, 5)

(2) (24,24,16)의 shape을 가진 텐서가 tf.keras.layers.Flatten()을 통과할때 나오는 텐서의 shape은?

24*24*16
9216
flltn = tf.keras.layers.Flatten()
XXX = tnp.arange(1*24*24*16,dtype=tf.float64).reshape(1,24,24,16)
flltn(XXX).shape
TensorShape([1, 9216])

answerL (1,9216)

(3)-(5)

아래와 같은 모형을 고려하자.

$$y_i= \beta_0 + \sum_{k=1}^{5} \beta_k \cos(k t_i)+\epsilon_i$$

여기에서 $t=(t_1,\dots,t_{1000})=$ np.linspace(0,5,1000) 이다. 그리고 $\epsilon_i \sim i.i.d~ N(0,\sigma^2)$, 즉 서로 독립인 표준정규분포에서 추출된 샘플이다. 위의 모형에서 아래와 같은 데이터를 관측했다고 가정하자.

np.random.seed(43052)
t= np.linspace(0,5,1000)
y = -2+ 3*np.cos(t) + 1*np.cos(2*t) + 0.5*np.cos(5*t) + np.random.randn(1000)*0.2
plt.plot(t,y,'.',alpha=0.1)
[<matplotlib.lines.Line2D at 0x7fa339721ae0>]

(3) 모형에 대한 설명 중 옳은 것을 모두 골라라.

(하영) 이 모형의 경우 MSEloss를 최소화하는 $\hat{\beta}_0,\dots,\hat{\beta}_5$를 구하는것은 최대우도함수를 최대화하는 $\hat{\beta}_0,\dots,\hat{\beta}_5$를 구하는 것과 같다.

(재인) 하영의 말이 옳은 이유는 오차항이 정규분포를 따른다는 가정이 있기 때문이다.

(서연) 이 모형에서 적절한 학습률이 선택되더라도 경사하강법을 이용하면 MSEloss를 최소화하는 $\hat{\beta}_0,\dots,\hat{\beta}_5$를 종종 구할 수 없는 문제가 생긴다. 왜냐하면 손실함수가 convex하지 않아서 local minimum에 빠질 위험이 있기 때문이다.

(규빈) 만약에 경사하강법 대신 확률적 경사하강법을 쓴다면 local minimum을 언제나 탈출 할 수 있다. 따라서 서연이 언급한 문제점은 생기지 않는다.

answer: 하영, 재인, 서연

(4) 다음은 아래 모형을 학습한 결과이다. 옳게 해석한 것을 모두 고르시오.

y = y.reshape(1000,1)
x1 = np.cos(t) 
x2 = np.cos(2*t)
x3 = np.cos(3*t)
x4 = np.cos(4*t)
x5 = np.cos(5*t)
X = tf.stack([x1,x2,x3,x4,x5],axis=1)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1)) 
net.compile(loss='mse',optimizer='adam')
net.fit(X,y,epochs=500,batch_size=100, validation_split=0.45,verbose=0) 
<keras.callbacks.History at 0x7fa338d03400>
plt.plot(y,'.',alpha=0.1)
plt.plot(net(X),'--')
[<matplotlib.lines.Line2D at 0x7fa338d03df0>]

(재인) 처음 550개의 데이터만 학습하고 이후의 450개의 데이터는 학습하지 않고 validation으로 이용하였다.

(서연) validation에서의 적합결과가 좋지 않다.

(규빈) validation의 적합결과가 좋지 않기 때문에 오버피팅을 의심할 수 있다. 따라서 만약에 네트워크에 드랍아웃층을 추가한다면 오버피팅을 방지하는 효과가 있어 validation의 loss가 줄어들 것이다.

(하영) 이 모형의 경우 더 많은 epoch으로 학습한다면 train loss와 validation loss를 둘 다 줄일 수 있다.

answer: 재인, 서연, 규빈

(5) 다음을 잘 읽고 참 거짓을 판별하라.

  • Convolution은 선형변환이다.

answer: 참

  • CNN을 이용하면 언제나 손실함수를 MSEloss로 선택해야 한다.

answer: 거짓

  • CNN은 adam optimizer를 통해서만 최적화할 수 있다.

answer: 거짓

  • 이미지자료는 CNN을 이용하여서만 분석할 수 있으며 DNN으로는 분석불가능하다.

answer: 거짓

  • CNN은 칼라이미지일 경우에만 적용가능하다.

answer: 거짓