imports

import numpy as np
import matplotlib.pyplot as plt 
import tensorflow as tf 
import tensorflow.experimental.numpy as tnp 
tnp.experimental_enable_numpy_behavior()
%load_ext tensorboard
import graphviz
def gv(s): return graphviz.Source('digraph G{ rankdir="LR"'+ s + ';}')

1. Fashion_mnist, DNN (30점)

(1) tf.keras.datasets.fashion_mnist.load_data()을 이용하여 fashion_mnist 자료를 불러온 뒤 아래의 네트워크를 이용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
x_train.shape, y_train.shape, x_test.shape, y_test.shape
((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))
X = tf.constant(x_train.reshape(-1,28,28,1),dtype=tf.float64)
y = tf.keras.utils.to_categorical(y_train)
XX = tf.constant(x_test.reshape(-1,28,28,1),dtype=tf.float64)
yy = tf.keras.utils.to_categorical(y_test)
  • 평가지표로 accuracy를 이용할 것
  • epoch은 10으로 설정할 것
  • optimizer는 adam을 이용할 것

gv('''
splines=line
subgraph cluster_1{
    style=filled;
    color=lightgrey;
    "x1"
    "x2"
    ".."
    "x784"
    label = "Layer 0"
}
subgraph cluster_2{
    style=filled;
    color=lightgrey;
    "x1" -> "node1"
    "x2" -> "node1"
    ".." -> "node1"
    "x784" -> "node1"
    
    "x1" -> "node2"
    "x2" -> "node2"
    ".." -> "node2"
    "x784" -> "node2"
    
    "x1" -> "..."
    "x2" -> "..."
    ".." -> "..."
    "x784" -> "..."

    "x1" -> "node20"
    "x2" -> "node20"
    ".." -> "node20"
    "x784" -> "node20"


    label = "Layer 1: relu"
}
subgraph cluster_3{
    style=filled;
    color=lightgrey;
    "node1" -> "node1 "
    "node2" -> "node1 "
    "..." -> "node1 "
    "node20" -> "node1 "
    
    "node1" -> "node2 "
    "node2" -> "node2 "
    "..." -> "node2 "
    "node20" -> "node2 "
    
    "node1" -> "... "
    "node2" -> "... "
    "..." -> "... "
    "node20" -> "... "

    "node1" -> "node30 "
    "node2" -> "node30 "
    "..." -> "node30 "
    "node20" -> "node30 "


    label = "Layer 2: relu"
}
subgraph cluster_4{
    style=filled;
    color=lightgrey;

    "node1 " -> "y10"
    "node2 " -> "y10"
    "... " -> "y10"
    "node30 " -> "y10"
    
    "node1 " -> "y1"
    "node2 " -> "y1"
    "... " -> "y1"
    "node30 " -> "y1"
    
    "node1 " -> "."
    "node2 " -> "."
    "... " -> "."
    "node30 " -> "."
    
    label = "Layer 3: softmax"
}
''')
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> G cluster_1 Layer 0 cluster_2 Layer 1: relu cluster_3 Layer 2: relu cluster_4 Layer 3: softmax x1 x1 node1 node1 x1->node1 node2 node2 x1->node2 ... ... x1->... node20 node20 x1->node20 x2 x2 x2->node1 x2->node2 x2->... x2->node20 .. .. ..->node1 ..->node2 ..->... ..->node20 x784 x784 x784->node1 x784->node2 x784->... x784->node20 node1 node1 node1->node1 node2 node2 node1->node2 ... ... node1->... node30 node30 node1->node30 node2->node1 node2->node2 node2->... node2->node30 ...->node1 ...->node2 ...->... ...->node30 node20->node1 node20->node2 node20->... node20->node30 y10 y10 node1 ->y10 y1 y1 node1 ->y1 . . node1 ->. node2 ->y10 node2 ->y1 node2 ->. ... ->y10 ... ->y1 ... ->. node30 ->y10 node30 ->y1 node30 ->.
tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=10,batch_size=200)
Epoch 1/10
 51/300 [====>.........................] - ETA: 0s - loss: 10.3140 - accuracy: 0.1126
2022-06-12 20:20:18.074442: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
300/300 [==============================] - 2s 3ms/step - loss: 3.3773 - accuracy: 0.1935
Epoch 2/10
300/300 [==============================] - 1s 4ms/step - loss: 1.7547 - accuracy: 0.2837
Epoch 3/10
300/300 [==============================] - 1s 4ms/step - loss: 1.5505 - accuracy: 0.3734
Epoch 4/10
300/300 [==============================] - 1s 4ms/step - loss: 1.3838 - accuracy: 0.4317
Epoch 5/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2905 - accuracy: 0.4512
Epoch 6/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2546 - accuracy: 0.4586
Epoch 7/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2338 - accuracy: 0.4630
Epoch 8/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2205 - accuracy: 0.4682
Epoch 9/10
300/300 [==============================] - 1s 4ms/step - loss: 1.2067 - accuracy: 0.4745
Epoch 10/10
300/300 [==============================] - 1s 4ms/step - loss: 1.1229 - accuracy: 0.5188
<keras.callbacks.History at 0x7f8bfbf9c580>
net.summary()
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 flatten (Flatten)           (200, 784)                0         
                                                                 
 dense (Dense)               (200, 20)                 15700     
                                                                 
 dense_1 (Dense)             (200, 30)                 630       
                                                                 
 dense_2 (Dense)             (200, 10)                 310       
                                                                 
=================================================================
Total params: 16,640
Trainable params: 16,640
Non-trainable params: 0
_________________________________________________________________

(2) (1)에서 적합된 네트워크를 이용하여 test data의 accuracy를 구하라.

net.evaluate(XX,yy)[1]
313/313 [==============================] - 1s 3ms/step - loss: 1.1249 - accuracy: 0.5273
0.5273000001907349

(3) train set에서 20%의 자료를 validation 으로 분리하여 50에폭동안 학습하라. 텐서보드를 이용하여 train accuracy와 validation accuracy를 시각화 하고 결과를 해석하라. 오버피팅이라고 볼 수 있는가?

tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
cb1 = tf.keras.callbacks.TensorBoard()
net.fit(X,y,epochs=50,batch_size=200,validation_split=0.2,callbacks=cb1,verbose=1)
Epoch 1/50
240/240 [==============================] - 1s 5ms/step - loss: 3.5097 - accuracy: 0.1968 - val_loss: 1.8552 - val_accuracy: 0.2494
Epoch 2/50
240/240 [==============================] - 1s 5ms/step - loss: 1.7568 - accuracy: 0.2974 - val_loss: 1.7041 - val_accuracy: 0.3078
Epoch 3/50
240/240 [==============================] - 1s 5ms/step - loss: 1.6305 - accuracy: 0.3401 - val_loss: 1.5662 - val_accuracy: 0.3707
Epoch 4/50
240/240 [==============================] - 1s 5ms/step - loss: 1.4904 - accuracy: 0.4000 - val_loss: 1.4276 - val_accuracy: 0.4173
Epoch 5/50
240/240 [==============================] - 1s 5ms/step - loss: 1.3527 - accuracy: 0.4417 - val_loss: 1.3184 - val_accuracy: 0.4498
Epoch 6/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2798 - accuracy: 0.4554 - val_loss: 1.2782 - val_accuracy: 0.4569
Epoch 7/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2515 - accuracy: 0.4604 - val_loss: 1.2648 - val_accuracy: 0.4592
Epoch 8/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2296 - accuracy: 0.4644 - val_loss: 1.2487 - val_accuracy: 0.4635
Epoch 9/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2178 - accuracy: 0.4703 - val_loss: 1.2319 - val_accuracy: 0.4638
Epoch 10/50
240/240 [==============================] - 1s 5ms/step - loss: 1.1885 - accuracy: 0.4926 - val_loss: 1.1478 - val_accuracy: 0.4963
Epoch 11/50
240/240 [==============================] - 1s 5ms/step - loss: 1.1013 - accuracy: 0.5245 - val_loss: 1.0751 - val_accuracy: 0.5587
Epoch 12/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0572 - accuracy: 0.5429 - val_loss: 1.0514 - val_accuracy: 0.5526
Epoch 13/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0314 - accuracy: 0.5544 - val_loss: 1.1089 - val_accuracy: 0.5083
Epoch 14/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0145 - accuracy: 0.5592 - val_loss: 1.0447 - val_accuracy: 0.5441
Epoch 15/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9906 - accuracy: 0.5718 - val_loss: 0.9869 - val_accuracy: 0.5895
Epoch 16/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9770 - accuracy: 0.5760 - val_loss: 0.9818 - val_accuracy: 0.5671
Epoch 17/50
240/240 [==============================] - 1s 5ms/step - loss: 0.9542 - accuracy: 0.5903 - val_loss: 0.9571 - val_accuracy: 0.6049
Epoch 18/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8954 - accuracy: 0.6348 - val_loss: 0.8723 - val_accuracy: 0.6518
Epoch 19/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8584 - accuracy: 0.6534 - val_loss: 0.8626 - val_accuracy: 0.6635
Epoch 20/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8212 - accuracy: 0.6676 - val_loss: 0.8379 - val_accuracy: 0.6567
Epoch 21/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7915 - accuracy: 0.6787 - val_loss: 0.8099 - val_accuracy: 0.6718
Epoch 22/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7768 - accuracy: 0.6862 - val_loss: 0.8036 - val_accuracy: 0.6880
Epoch 23/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7623 - accuracy: 0.6923 - val_loss: 0.7813 - val_accuracy: 0.6964
Epoch 24/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7432 - accuracy: 0.6993 - val_loss: 0.7785 - val_accuracy: 0.6958
Epoch 25/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7412 - accuracy: 0.7002 - val_loss: 0.7933 - val_accuracy: 0.6976
Epoch 26/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7376 - accuracy: 0.7056 - val_loss: 0.7776 - val_accuracy: 0.7045
Epoch 27/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7349 - accuracy: 0.7118 - val_loss: 0.7849 - val_accuracy: 0.6673
Epoch 28/50
240/240 [==============================] - 1s 5ms/step - loss: 0.7123 - accuracy: 0.7284 - val_loss: 0.7190 - val_accuracy: 0.7448
Epoch 29/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6976 - accuracy: 0.7342 - val_loss: 0.7479 - val_accuracy: 0.7185
Epoch 30/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6715 - accuracy: 0.7477 - val_loss: 0.6951 - val_accuracy: 0.7377
Epoch 31/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6776 - accuracy: 0.7413 - val_loss: 0.7037 - val_accuracy: 0.7303
Epoch 32/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6567 - accuracy: 0.7505 - val_loss: 0.6952 - val_accuracy: 0.7441
Epoch 33/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6456 - accuracy: 0.7566 - val_loss: 0.6864 - val_accuracy: 0.7623
Epoch 34/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6361 - accuracy: 0.7619 - val_loss: 0.6739 - val_accuracy: 0.7613
Epoch 35/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6391 - accuracy: 0.7652 - val_loss: 0.6839 - val_accuracy: 0.7510
Epoch 36/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6397 - accuracy: 0.7662 - val_loss: 0.6572 - val_accuracy: 0.7676
Epoch 37/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6235 - accuracy: 0.7734 - val_loss: 0.6690 - val_accuracy: 0.7717
Epoch 38/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6127 - accuracy: 0.7791 - val_loss: 0.6474 - val_accuracy: 0.7796
Epoch 39/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6073 - accuracy: 0.7812 - val_loss: 0.6473 - val_accuracy: 0.7802
Epoch 40/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5960 - accuracy: 0.7856 - val_loss: 0.6459 - val_accuracy: 0.7808
Epoch 41/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6039 - accuracy: 0.7840 - val_loss: 0.6936 - val_accuracy: 0.7622
Epoch 42/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6002 - accuracy: 0.7850 - val_loss: 0.6455 - val_accuracy: 0.7729
Epoch 43/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6010 - accuracy: 0.7833 - val_loss: 0.6551 - val_accuracy: 0.7792
Epoch 44/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6033 - accuracy: 0.7840 - val_loss: 0.6101 - val_accuracy: 0.7910
Epoch 45/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5868 - accuracy: 0.7893 - val_loss: 0.6341 - val_accuracy: 0.7854
Epoch 46/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5934 - accuracy: 0.7864 - val_loss: 0.6225 - val_accuracy: 0.7891
Epoch 47/50
240/240 [==============================] - 1s 5ms/step - loss: 0.6246 - accuracy: 0.7786 - val_loss: 0.6429 - val_accuracy: 0.7804
Epoch 48/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5729 - accuracy: 0.7906 - val_loss: 0.6607 - val_accuracy: 0.7561
Epoch 49/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5760 - accuracy: 0.7926 - val_loss: 0.7408 - val_accuracy: 0.7321
Epoch 50/50
240/240 [==============================] - 1s 5ms/step - loss: 0.5829 - accuracy: 0.7890 - val_loss: 0.6363 - val_accuracy: 0.7809
<keras.callbacks.History at 0x7f8bfa5e3ee0>
%tensorboard --logdir logs --host 0.0.0.0
Reusing TensorBoard on port 6006 (pid 2281220), started 0:00:00 ago. (Use '!kill 2281220' to kill it.)

train accuracy보다validation accuracy accuracy가 0.01정도 높게 나왔다.

우리의 목적은 validation accuracy가 더 좋게 나오길 원하는 것이었다.

training loss와 같이 validation loss도 줄어들고 있었다.

비록 중간에 조금 상승하는 구간이 보이지만 결국 training loss와 비슷한 값에 수렴했다.(0.01 정도 차이)

오버피팅은 아니다.

(4) (3)에서 적합된 네트워크를 이용하여 test data의 accuracy를 구하라. (2)의 결과와 비교하라.

net.evaluate(XX,yy)[1]
313/313 [==============================] - 1s 4ms/step - loss: 0.6505 - accuracy: 0.7786
0.7785999774932861

validation을 split하여 training하니 정확도가 높아졌다.

20%의 validation을 제외한 training data로 학습을 한 것보다 20%의 validation의 loss가 더 낮아지는 결과가 도출되었다.

(3)의 결과에서 알 수 있듯이 오버피팅의 징조가 보이니 validation split을 한 network는 좋은 net을 설계했다고 볼 수 없겠다.

(5) 조기종료기능을 이용하여 (3)의 네트워크를 다시 학습하라. 학습결과를 텐서보드를 이용하여 시각화 하라.

  • patience=3 으로 설정할 것
tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(20,activation='relu'))
net.add(tf.keras.layers.Dense(30,activation='relu'))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
cb1 = tf.keras.callbacks.TensorBoard()
cb2 = tf.keras.callbacks.EarlyStopping(patience=3)
net.fit(X,y,epochs=50,batch_size=200,validation_split=0.2,callbacks=[cb1,cb2]) 
Epoch 1/50
240/240 [==============================] - 2s 8ms/step - loss: 3.5097 - accuracy: 0.1968 - val_loss: 1.8552 - val_accuracy: 0.2495
Epoch 2/50
240/240 [==============================] - 1s 4ms/step - loss: 1.7548 - accuracy: 0.2991 - val_loss: 1.6986 - val_accuracy: 0.3088
Epoch 3/50
240/240 [==============================] - 2s 7ms/step - loss: 1.6166 - accuracy: 0.3491 - val_loss: 1.5575 - val_accuracy: 0.3804
Epoch 4/50
240/240 [==============================] - 1s 5ms/step - loss: 1.4841 - accuracy: 0.4009 - val_loss: 1.4335 - val_accuracy: 0.4156
Epoch 5/50
240/240 [==============================] - 1s 6ms/step - loss: 1.3541 - accuracy: 0.4414 - val_loss: 1.3242 - val_accuracy: 0.4461
Epoch 6/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2795 - accuracy: 0.4547 - val_loss: 1.2824 - val_accuracy: 0.4543
Epoch 7/50
240/240 [==============================] - 2s 7ms/step - loss: 1.2504 - accuracy: 0.4594 - val_loss: 1.2627 - val_accuracy: 0.4586
Epoch 8/50
240/240 [==============================] - 1s 6ms/step - loss: 1.2340 - accuracy: 0.4628 - val_loss: 1.2426 - val_accuracy: 0.4618
Epoch 9/50
240/240 [==============================] - 1s 5ms/step - loss: 1.2207 - accuracy: 0.4683 - val_loss: 1.2317 - val_accuracy: 0.4631
Epoch 10/50
240/240 [==============================] - 1s 3ms/step - loss: 1.1920 - accuracy: 0.4914 - val_loss: 1.1480 - val_accuracy: 0.4908
Epoch 11/50
240/240 [==============================] - 1s 5ms/step - loss: 1.0957 - accuracy: 0.5300 - val_loss: 1.0769 - val_accuracy: 0.5415
Epoch 12/50
240/240 [==============================] - 1s 6ms/step - loss: 1.0510 - accuracy: 0.5433 - val_loss: 1.0635 - val_accuracy: 0.5305
Epoch 13/50
240/240 [==============================] - 2s 7ms/step - loss: 1.0286 - accuracy: 0.5562 - val_loss: 1.0352 - val_accuracy: 0.5621
Epoch 14/50
240/240 [==============================] - 1s 6ms/step - loss: 1.0163 - accuracy: 0.5579 - val_loss: 1.0201 - val_accuracy: 0.5585
Epoch 15/50
240/240 [==============================] - 1s 6ms/step - loss: 1.0054 - accuracy: 0.5674 - val_loss: 1.0188 - val_accuracy: 0.5562
Epoch 16/50
240/240 [==============================] - 1s 4ms/step - loss: 0.9956 - accuracy: 0.5766 - val_loss: 1.0052 - val_accuracy: 0.5876
Epoch 17/50
240/240 [==============================] - 2s 7ms/step - loss: 0.9294 - accuracy: 0.6262 - val_loss: 0.9101 - val_accuracy: 0.6352
Epoch 18/50
240/240 [==============================] - 2s 7ms/step - loss: 0.8882 - accuracy: 0.6436 - val_loss: 0.9630 - val_accuracy: 0.6083
Epoch 19/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8617 - accuracy: 0.6510 - val_loss: 0.8606 - val_accuracy: 0.6593
Epoch 20/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8529 - accuracy: 0.6573 - val_loss: 0.8814 - val_accuracy: 0.6575
Epoch 21/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8249 - accuracy: 0.6711 - val_loss: 0.8362 - val_accuracy: 0.6753
Epoch 22/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8149 - accuracy: 0.6709 - val_loss: 0.8508 - val_accuracy: 0.6546
Epoch 23/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8063 - accuracy: 0.6756 - val_loss: 0.8348 - val_accuracy: 0.6833
Epoch 24/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8036 - accuracy: 0.6752 - val_loss: 0.8407 - val_accuracy: 0.6733
Epoch 25/50
240/240 [==============================] - 1s 6ms/step - loss: 0.8005 - accuracy: 0.6783 - val_loss: 0.8608 - val_accuracy: 0.6524
Epoch 26/50
240/240 [==============================] - 1s 6ms/step - loss: 0.7874 - accuracy: 0.6834 - val_loss: 0.8181 - val_accuracy: 0.6619
Epoch 27/50
240/240 [==============================] - 1s 5ms/step - loss: 0.8025 - accuracy: 0.6809 - val_loss: 0.8235 - val_accuracy: 0.6429
Epoch 28/50
240/240 [==============================] - 2s 7ms/step - loss: 0.7822 - accuracy: 0.6908 - val_loss: 0.8205 - val_accuracy: 0.6938
Epoch 29/50
240/240 [==============================] - 1s 6ms/step - loss: 0.7672 - accuracy: 0.6988 - val_loss: 0.8417 - val_accuracy: 0.6491
<keras.callbacks.History at 0x7f11702d51b0>
%tensorboard --logdir logs --host 0.0.0.0
Reusing TensorBoard on port 6006 (pid 2045929), started 0:00:04 ago. (Use '!kill 2045929' to kill it.)

2. Fashion_mnist, CNN (30점)

(1) tf.keras.datasets.fashion_mnist.load_data()을 이용하여 fashion_mnist 자료를 불러온 뒤 아래의 네트워크를 이용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X = tf.constant(x_train.reshape(-1,28,28,1),dtype=tf.float64)
y = tf.keras.utils.to_categorical(y_train)
XX = tf.constant(x_test.reshape(-1,28,28,1),dtype=tf.float64)
yy = tf.keras.utils.to_categorical(y_test)
  • 이때 n1=6, n2=16, n3=120 으로 설정한다, 드랍아웃비율은 20%로 설정한다.
  • net.summary()를 출력하여 설계결과를 확인하라.

tf.random.set_seed(1213)
!rm -rf logs
net = tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(6,(5,5),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Conv2D(16,(5,5),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(120,activation='relu'))
net.add(tf.keras.layers.Dropout(0.2))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=5)
Epoch 1/5
1875/1875 [==============================] - 8s 4ms/step - loss: 0.8791 - accuracy: 0.7674
Epoch 2/5
1875/1875 [==============================] - 10s 5ms/step - loss: 0.4308 - accuracy: 0.8436
Epoch 3/5
1875/1875 [==============================] - 10s 5ms/step - loss: 0.3858 - accuracy: 0.8593
Epoch 4/5
1875/1875 [==============================] - 9s 5ms/step - loss: 0.3617 - accuracy: 0.8671
Epoch 5/5
1875/1875 [==============================] - 8s 5ms/step - loss: 0.3410 - accuracy: 0.8731
<keras.callbacks.History at 0x7f17423697b0>
net.layers
[<keras.layers.convolutional.Conv2D at 0x7f17a82c9720>,
 <keras.layers.pooling.MaxPooling2D at 0x7f118847b970>,
 <keras.layers.convolutional.Conv2D at 0x7f118847ba60>,
 <keras.layers.pooling.MaxPooling2D at 0x7f11884fce50>,
 <keras.layers.core.flatten.Flatten at 0x7f173a555780>,
 <keras.layers.core.dense.Dense at 0x7f17d043e320>,
 <keras.layers.core.dropout.Dropout at 0x7f17d043f040>,
 <keras.layers.core.dense.Dense at 0x7f173a60cdc0>]
c1,m1,c2,m2,flttn,rel,dro,dns = net.layers

print(X.shape) 
print(c1(X).shape) 
print(m1(c1(X)).shape) 
print(c2(m1(c1(X))).shape) 
print(m2(c2(m1(c1(X)))).shape) 
print(flttn(m2(c2(m1(c1(X))))).shape) 
print(rel(flttn(m2(c2(m1(c1(X)))))).shape) 
print(dro(rel(flttn(m2(c2(m1(c1(X))))))).shape) 
print(dns(dro(rel(flttn(m2(c2(m1(c1(X)))))))).shape) 
(60000, 28, 28, 1)
(60000, 24, 24, 6)
(60000, 12, 12, 6)
(60000, 8, 8, 16)
(60000, 4, 4, 16)
(60000, 256)
(60000, 120)
(60000, 120)
(60000, 10)
net.summary()
Model: "sequential_200"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d_394 (Conv2D)         (32, 24, 24, 6)           156       
                                                                 
 max_pooling2d_390 (MaxPooli  (32, 12, 12, 6)          0         
 ng2D)                                                           
                                                                 
 conv2d_395 (Conv2D)         (32, 8, 8, 16)            2416      
                                                                 
 max_pooling2d_391 (MaxPooli  (32, 4, 4, 16)           0         
 ng2D)                                                           
                                                                 
 flatten_200 (Flatten)       (32, 256)                 0         
                                                                 
 dense_403 (Dense)           (32, 120)                 30840     
                                                                 
 dropout_196 (Dropout)       (32, 120)                 0         
                                                                 
 dense_404 (Dense)           (32, 10)                  1210      
                                                                 
=================================================================
Total params: 34,622
Trainable params: 34,622
Non-trainable params: 0
_________________________________________________________________

(2) n1=(6,64,128), n2=(16,256)에 대하여 test set의 loss가 최소화되는 조합을 찾아라. 결과를 텐서보드로 시각화하는 코드를 작성하라.

  • epoc은 3회로 한정한다.
  • validation_split은 0.2로 설정한다.
from tensorboard.plugins.hparams import api as hp
a=[]
tf.random.set_seed(1213)
!rm -rf logs
for u in [6,64,128]: 
    for d in [16,256]: 
        logdir = 'logs/hp_{}_{}'.format(u,d)
        with tf.summary.create_file_writer(logdir).as_default():
            net = tf.keras.Sequential()
            net.add(tf.keras.layers.Conv2D(u,(5,5),activation='relu'))
            net.add(tf.keras.layers.MaxPool2D())
            net.add(tf.keras.layers.Conv2D(d,(5,5),activation='relu'))
            net.add(tf.keras.layers.MaxPool2D())
            net.add(tf.keras.layers.Flatten())
            net.add(tf.keras.layers.Dense(120,activation='relu'))
            net.add(tf.keras.layers.Dropout(0.2))
            net.add(tf.keras.layers.Dense(10,activation='softmax'))
            net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
            cb3 = hp.KerasCallback(logdir, {'n1':u, 'n2':d})
            net.fit(X,y,epochs=3,batch_size=200,validation_split=0.2,callbacks=cb3)
            _rslt=net.evaluate(XX,yy)
            a.append(_rslt[0])
            tf.summary.scalar('loss(테스트셋)',_rslt[0], step=1) 
Epoch 1/3
240/240 [==============================] - 3s 10ms/step - loss: 2.2066 - accuracy: 0.6594 - val_loss: 0.5792 - val_accuracy: 0.7904
Epoch 2/3
240/240 [==============================] - 2s 8ms/step - loss: 0.5800 - accuracy: 0.7913 - val_loss: 0.4755 - val_accuracy: 0.8284
Epoch 3/3
240/240 [==============================] - 2s 7ms/step - loss: 0.4906 - accuracy: 0.8231 - val_loss: 0.4423 - val_accuracy: 0.8372
313/313 [==============================] - 2s 6ms/step - loss: 0.4550 - accuracy: 0.8348
Epoch 1/3
240/240 [==============================] - 3s 8ms/step - loss: 1.2998 - accuracy: 0.7496 - val_loss: 0.3960 - val_accuracy: 0.8584
Epoch 2/3
240/240 [==============================] - 2s 9ms/step - loss: 0.4058 - accuracy: 0.8568 - val_loss: 0.3453 - val_accuracy: 0.8751
Epoch 3/3
240/240 [==============================] - 2s 8ms/step - loss: 0.3365 - accuracy: 0.8796 - val_loss: 0.3175 - val_accuracy: 0.8848
313/313 [==============================] - 2s 4ms/step - loss: 0.3444 - accuracy: 0.8784
Epoch 1/3
240/240 [==============================] - 2s 8ms/step - loss: 1.6604 - accuracy: 0.5781 - val_loss: 0.6658 - val_accuracy: 0.7638
Epoch 2/3
240/240 [==============================] - 2s 9ms/step - loss: 0.6420 - accuracy: 0.7723 - val_loss: 0.5566 - val_accuracy: 0.8022
Epoch 3/3
240/240 [==============================] - 2s 7ms/step - loss: 0.5498 - accuracy: 0.8012 - val_loss: 0.5062 - val_accuracy: 0.8204
313/313 [==============================] - 2s 5ms/step - loss: 0.5221 - accuracy: 0.8104
Epoch 1/3
240/240 [==============================] - 2s 6ms/step - loss: 1.4709 - accuracy: 0.7603 - val_loss: 0.3609 - val_accuracy: 0.8709
Epoch 2/3
240/240 [==============================] - 2s 8ms/step - loss: 0.3619 - accuracy: 0.8729 - val_loss: 0.3122 - val_accuracy: 0.8838
Epoch 3/3
240/240 [==============================] - 3s 11ms/step - loss: 0.3004 - accuracy: 0.8941 - val_loss: 0.2842 - val_accuracy: 0.8971
313/313 [==============================] - 2s 6ms/step - loss: 0.3028 - accuracy: 0.8904
Epoch 1/3
240/240 [==============================] - 3s 11ms/step - loss: 1.4419 - accuracy: 0.5775 - val_loss: 0.6410 - val_accuracy: 0.7645
Epoch 2/3
240/240 [==============================] - 3s 11ms/step - loss: 0.5972 - accuracy: 0.7793 - val_loss: 0.4851 - val_accuracy: 0.8187
Epoch 3/3
240/240 [==============================] - 2s 10ms/step - loss: 0.4984 - accuracy: 0.8176 - val_loss: 0.4373 - val_accuracy: 0.8357
313/313 [==============================] - 2s 5ms/step - loss: 0.4565 - accuracy: 0.8273
Epoch 1/3
240/240 [==============================] - 4s 13ms/step - loss: 2.0745 - accuracy: 0.7771 - val_loss: 0.3579 - val_accuracy: 0.8695
Epoch 2/3
240/240 [==============================] - 3s 11ms/step - loss: 0.3385 - accuracy: 0.8813 - val_loss: 0.2956 - val_accuracy: 0.8947
Epoch 3/3
240/240 [==============================] - 3s 11ms/step - loss: 0.2799 - accuracy: 0.9005 - val_loss: 0.2731 - val_accuracy: 0.8999
313/313 [==============================] - 2s 4ms/step - loss: 0.2948 - accuracy: 0.8939
%tensorboard --logdir logs --host 0.0.0.0
a
[0.4550354480743408,
 0.34435006976127625,
 0.522118866443634,
 0.3027571737766266,
 0.45646074414253235,
 0.2948054075241089]

위는 순서대로

  • $n_1$=(6), $n_2$=(16)
  • $n_1$=(6), $n_2$=(256)
  • $n_1$=(64), $n_2$=(16)
  • $n_1$=(64), $n_2$=(256)
  • $n_1$=(128), $n_2$=(16)
  • $n_1$=(128), $n_2$=(256)

$n_1=128, n_2 = 256$에서 test set의 loss가 $0.2948054075241089$로 최소화된다.

3. CIFAR10 (30점)

tf.keras.datasets.cifar10.load_data()을 이용하여 CIFAR10을 불러온 뒤 적당한 네트워크를 사용하여 적합하라.

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train.shape, y_train.shape, x_test.shape, y_test.shape
((50000, 32, 32, 3), (50000, 1), (10000, 32, 32, 3), (10000, 1))
X=x_train.reshape(-1,32,32,3)/255 # 50000 by 1024 by 
y=tf.keras.utils.to_categorical(y_train) 
XX=x_test.reshape(-1,32,32,3)/255
yy=tf.keras.utils.to_categorical(y_test)
X.shape,y.shape,XX.shape,yy.shape
((50000, 32, 32, 3), (50000, 10), (10000, 32, 32, 3), (10000, 10))
  • 결과를 텐서보드로 시각화할 필요는 없다.
  • 자유롭게 모형을 설계하여 적합하라.
  • test set의 accuracy가 70%이상인 경우만 정답으로 인정한다.
tf.random.set_seed(1213)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Conv2D(98,(2,2),activation='relu'))
net.add(tf.keras.layers.Conv2D(98,(2,2),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Conv2D(256,(2,2),activation='relu'))
net.add(tf.keras.layers.Conv2D(256,(2,2),activation='relu'))
net.add(tf.keras.layers.MaxPool2D())
net.add(tf.keras.layers.Flatten())
net.add(tf.keras.layers.Dense(256,activation='relu'))
net.add(tf.keras.layers.Dropout(0.2))
net.add(tf.keras.layers.Dense(10,activation='softmax'))
net.compile(optimizer='adam',loss=tf.losses.categorical_crossentropy,metrics='accuracy')
net.fit(X,y,epochs=5,validation_split=0.2)
Epoch 1/5
1250/1250 [==============================] - 11s 8ms/step - loss: 1.4445 - accuracy: 0.4749 - val_loss: 1.0996 - val_accuracy: 0.6179
Epoch 2/5
1250/1250 [==============================] - 9s 7ms/step - loss: 0.9984 - accuracy: 0.6500 - val_loss: 0.9094 - val_accuracy: 0.6784
Epoch 3/5
1250/1250 [==============================] - 9s 7ms/step - loss: 0.8065 - accuracy: 0.7190 - val_loss: 0.8279 - val_accuracy: 0.7145
Epoch 4/5
1250/1250 [==============================] - 10s 8ms/step - loss: 0.6654 - accuracy: 0.7656 - val_loss: 0.7672 - val_accuracy: 0.7406
Epoch 5/5
1250/1250 [==============================] - 10s 8ms/step - loss: 0.5462 - accuracy: 0.8066 - val_loss: 0.7723 - val_accuracy: 0.7448
<keras.callbacks.History at 0x7f173a653280>
net.evaluate(XX,yy)[1]
313/313 [==============================] - 2s 6ms/step - loss: 0.7952 - accuracy: 0.7404
0.7404000163078308

4. 다음을 읽고 물음에 답하라. (10점)

(1) (128,128,3)의 shape을 가진 텐서가 tf.keras.layers.Conv2D(5,(2,2))으로 만들어진 커널을 통과할시 나오는 shape은?

cnv = tf.keras.layers.Conv2D(5,(2,2))
XXX = tnp.arange(1*128*128*3,dtype=tf.float64).reshape(1,128,128,3)
cnv(XXX).shape
TensorShape([1, 127, 127, 5])

answer: (1, 127, 127, 5)

(2) (24,24,16)의 shape을 가진 텐서가 tf.keras.layers.Flatten()을 통과할때 나오는 텐서의 shape은?

24*24*16
9216
flltn = tf.keras.layers.Flatten()
XXX = tnp.arange(1*24*24*16,dtype=tf.float64).reshape(1,24,24,16)
flltn(XXX).shape
TensorShape([1, 9216])

answerL (1,9216)

(3) 관측치의 수가 10인 자료에서 batch_size=3으로 설정하면 한 에폭당 몇번의 이터레이션이 수행되는가?

3

(4) 적당한 선형변환을 적용하여 (28,28)의 크기를 가진 흑백이미지를 (27,27)의 크기를 가지는 흑백이미지로 바꾸려 한다. (2,2) 크기의 커널을 가진 Conv2D layer를 이용한다면 몇개의 가중치가 필요한가? (바이어스는 무시한다)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X= x_train.reshape(-1,28,28,1)/255
y = tf.keras.utils.to_categorical(y_train)
conv = tf.keras.layers.Conv2D(1,(2,2)) 
conv(X)
conv.weights
[<tf.Variable 'conv2d_2/kernel:0' shape=(2, 2, 1, 1) dtype=float32, numpy=
 array([[[[-0.42808822]],
 
         [[ 0.61645263]]],
 
 
        [[[ 0.4094376 ]],
 
         [[ 0.06302863]]]], dtype=float32)>,
 <tf.Variable 'conv2d_2/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]
conv.weights[0]
<tf.Variable 'conv2d_2/kernel:0' shape=(2, 2, 1, 1) dtype=float32, numpy=
array([[[[-0.42808822]],

        [[ 0.61645263]]],


       [[[ 0.4094376 ]],

        [[ 0.06302863]]]], dtype=float32)>
conv(X).shape
TensorShape([60000, 27, 27, 1])

4개

(5) 적당한 선형변환을 적용하여 (28,28)의 크기를 가진 흑백이미지를 (27,27)의 크기를 가지는 흑백이미지로 바꾸려 한다. Dense layer를 이용한다면 몇개의 가중치가 필요한가? (바이어스는 무시한다)

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X= x_train.reshape(-1,28,28,1)/255
y = tf.keras.utils.to_categorical(y_train)
plt.imshow(X[0])
<matplotlib.image.AxesImage at 0x7f8aac75cee0>
X.shape
(60000, 28, 28, 1)
flatten = tf.keras.layers.Flatten()
dense = tf.keras.layers.Dense(729,activation='relu')
dense.weights[0]
<tf.Variable 'dense_7/kernel:0' shape=(784, 729) dtype=float32, numpy=
array([[-0.06126465,  0.06081464, -0.02847512, ..., -0.02953292,
        -0.0128521 , -0.01588677],
       [ 0.00430008, -0.01665639,  0.03717923, ..., -0.06223682,
        -0.04807941,  0.03432452],
       [-0.04266407, -0.03506888,  0.04691477, ...,  0.03784292,
        -0.0169938 ,  0.04724574],
       ...,
       [-0.03184114,  0.00621009, -0.00734317, ...,  0.05529071,
        -0.00723364,  0.02950227],
       [ 0.01770519,  0.03401025, -0.03957512, ..., -0.02695979,
        -0.03614742,  0.05953851],
       [ 0.05379319,  0.02798077, -0.0164431 , ...,  0.02935955,
         0.04018781, -0.06059197]], dtype=float32)>
dense(flatten(X)).reshape(-1,27,27,1).shape
TensorShape([60000, 27, 27, 1])
plt.imshow(dense(flatten(X)).reshape(-1,27,27,1)[0],cmap='gray')
<matplotlib.image.AxesImage at 0x7f896c5cf040>

729개