학부 수업 중 데이터과학 수업 참고하세요~

imports

import tensorflow as tf
import numpy as np
tf.config.experimental.list_physical_devices('GPU')
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

지난강의 보충

- max, min, sum, mean

a = tf.constant([1.0,2.0,3.0,4.0])
a
<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1., 2., 3., 4.], dtype=float32)>
max(a)
<tf.Tensor: shape=(), dtype=float32, numpy=4.0>
min(a)
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
mean(a)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Input In [9], in <cell line: 1>()
----> 1 mean(a)

NameError: name 'mean' is not defined

는 오류남

tf.reduce_mean(a)
<tf.Tensor: shape=(), dtype=float32, numpy=2.5>

소수점 추가해서 int를 float으로 바꿔~

tf.reduce_mean(tf.constant([1,2,3,4]))
<tf.Tensor: shape=(), dtype=int32, numpy=2>

int 자체에서 계산하면 평균이 2로 나옴

나중에 tnp활용해서 numpy처럼 쓰자

concat, stack

- 예제: (2,3,4,5) stack (2,3,4,5) -> (?,?,?,?,?)

a = tf.reshape(tf.constant(range(2*3*4*5)),(2,3,4,5))
b = -a

case1 (1,2,3,4,5) stack (1,2,3,4,5) = (2,2,3,4,5) $\rightarrow$ axis = 0

  • (2,3,4,5) = (1,2,3,4,5)
tf.stack([a,b],axis=0).shape
TensorShape([2, 2, 3, 4, 5])

case2 (2,1,3,4,5) stack (2,1,3,4,5) = (2,2,3,4,5) $\rightarrow$ axis = 1

  • 늘어나는 차원의 축을 몇 번째에 넣을 것인가.
  • (2,3,4,5) = (2,1,3,4,5)
tf.stack([a,b],axis=1).shape
TensorShape([2, 2, 3, 4, 5])

case3 (2,3,1,4,5) stack (2,3,1,4,5) = (2,3,2,4,5) $\rightarrow$ axis = 2

  • (2,3,4,5) = (2,3,1,4,5)
tf.stack([a,b],axis=2).shape
TensorShape([2, 3, 2, 4, 5])

case4 (2,3,4,1,5) stack (2,3,4,1,5) = (2,3,4,2,5) $\rightarrow$ axis = 3

  • (2,3,4,5) = (2,3,4,1,5)
tf.stack([a,b],axis=3).shape
TensorShape([2, 3, 4, 2, 5])

case5 (2,3,4,5,1) stack (2,3,4,5,1) = (2,3,4,5,2) $\rightarrow$ axis = 4

  • (2,3,4,5) = (2,3,4,5,1)
tf.stack([a,b],axis=-1).shape
TensorShape([2, 3, 4, 5, 2])

- 예제: (2,3,4), (2,3,4), (2,3,4)

a = tf.reshape(tf.constant(range(2*3*4)),(2,3,4))
b = -a
c = 2*a

(예시1) (2,3,4), (2,3,4), (2,3,4) $\to$ (6,3,4) # 첫번째 축이 바뀐다는 뜻이지,

  • 차원이 같으니 stack 이 아니라 concat을 쓰자!
tf.concat([a,b,c],axis=0).shape
TensorShape([6, 3, 4])

(예시2) (2,3,4), (2,3,4), (2,3,4) $\to$ (2,9,4)

tf.concat([a,b,c],axis=1).shape
TensorShape([2, 9, 4])

(예시3) (2,3,4), (2,3,4), (2,3,4) $\to$ (2,3,12)

tf.concat([a,b,c],axis=-1).shape
TensorShape([2, 3, 12])

(예시4) (2,3,4), (2,3,4), (2,3,4) $\to$ (3,2,3,4) # 차원이 수정돼? stack을 쓰자

  • 차원 늘어난 거 첫번째로 놓고
  • 두 번째 자리에 축을 넣은 것으로 보자
tf.stack([a,b,c],axis=0).shape
TensorShape([3, 2, 3, 4])

(예시5) (2,3,4), (2,3,4), (2,3,4) $\to$ (2,3,3,4)

tf.stack([a,b,c],axis=1).shape
TensorShape([2, 3, 3, 4])

(예시6) (2,3,4), (2,3,4), (2,3,4) $\to$ (2,3,3,4)

tf.stack([a,b,c],axis=2).shape
TensorShape([2, 3, 3, 4])

(예시7) (2,3,4), (2,3,4), (2,3,4) $\to$ (2,3,4,3)

tf.stack([a,b,c],axis=-1).shape
TensorShape([2, 3, 4, 3])

- 예제: (2,3,4) (4,3,4) $\to$ (6,3,4)

  • 차원 두 개, concat쓰자
a = tf.reshape(tf.constant(range(2*3*4)),(2,3,4))
b = tf.reshape(-tf.constant(range(4*3*4)),(4,3,4))
a.shape
TensorShape([2, 3, 4])
b.shape
TensorShape([4, 3, 4])
tf.concat([a,b],axis=0).shape
TensorShape([6, 3, 4])
tf.concat([a,b],axis=1).shape
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [30], in <cell line: 1>()
----> 1 tf.concat([a,b],axis=1).shape

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [2,3,4] vs. shape[1] = [4,3,4] [Op:ConcatV2] name: concat

axis=1이면 두번째 축 기준인데 (2,3,4)하고 (4,3,4) 2,4/4,4의 차원이 같지 않잖아

tf.concat([a,b],axis=2).shape
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [31], in <cell line: 1>()
----> 1 tf.concat([a,b],axis=2).shape

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: ConcatOp : Dimensions of inputs should match: shape[0] = [2,3,4] vs. shape[1] = [4,3,4] [Op:ConcatV2] name: concat

axis=2이면 세번째 축 기준인데 (2,3,4)하고 (4,3,4) 2,3/4,3의 차원이 같지 않잖아

차원이 다르기 때문에 에러가 뜬다.

- (2,2) @ (2,) 의 연산?

numpy

np.array([[1,0],[0,1]]) @ np.array([77,-88])
array([ 77, -88])
np.array([77,-88]) @ np.array([[1,0],[0,1]])
array([ 77, -88])

(2,)는 길이가 2일 벡터일 뿐이니까 알아서 행이나 열 행렬로 인식해서 계산한다.

np.array([[1,0],[0,1]]) @ np.array([77,-88]).reshape(2,1)
array([[ 77],
       [-88]])
 np.array([77,-88]).reshape(1,2) @ np.array([[1,0],[0,1]])
array([[ 77, -88]])
 np.array([77,-88]).reshape(2,1) @ np.array([[1,0],[0,1]])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [41], in <cell line: 1>()
----> 1 np.array([77,-88]).reshape(2,1) @ np.array([[1,0],[0,1]])

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

(2,1)로 사이즈를 명시해주고 계산하면 에러가 뜬다.

  • $(2,1) \times (2,2)$
np.array([77,-88]).reshape(2,1)
array([[ 77],
       [-88]])
np.array([[1,0],[0,1]])
array([[1, 0],
       [0, 1]])
np.array([77,-88]).reshape(1,2) @ np.array([[1,0],[0,1]])
array([[ 77, -88]])

tf.constant는 행, 열 구분 후 알아서 계산해주지 않는다.

tensorflow

I = tf.constant([[1.0,0.0],[0.0,1.0]])
x = tf.constant([77.0,-88.0])
x @ I
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [4], in <cell line: 1>()
----> 1 x @ I

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: In[0] and In[1] has different ndims: [2] vs. [2,2] [Op:MatMul]
I @ x
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [6], in <cell line: 1>()
----> 1 I @ x

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: In[0] and In[1] has different ndims: [2,2] vs. [2] [Op:MatMul]
x
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 77., -88.], dtype=float32)>
I
<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1., 0.],
       [0., 1.]], dtype=float32)>
tf.reshape
<function tensorflow.python.ops.array_ops.reshape(tensor, shape, name=None)>
tf.reshape(x,-1)
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 77., -88.], dtype=float32)>
I @ tf.reshape(x,(2,1))
<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 77.],
       [-88.]], dtype=float32)>
tf.reshape(x,(1,2)) @ I
<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[ 77., -88.]], dtype=float32)>
tf.reshape(x,(2,1)) @ I
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [13], in <cell line: 1>()
----> 1 tf.reshape(x,(2,1)) @ I

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: Matrix size-incompatible: In[0]: [2,1], In[1]: [2,2] [Op:MatMul]

행렬 자체가 계산 안 되는 구조라 에러뜨지~


tf.Variable

선언

- tf.Variable()로 선언

tf.Variable([1,2,3,4])
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>
tf.Variable([1.0,2.0,3.0,4.0])
<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([1., 2., 3., 4.], dtype=float32)>

- tf.constant() 선언후 변환

_a = tf.Variable([1,2,3,4])
type(_a)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
tf.Variable(tf.constant([1,2,3,4]))
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>

- np 등으로 선언후 변환

tf.Variable(np.array([1,2,3,4]))
<tf.Variable 'Variable:0' shape=(4,) dtype=int64, numpy=array([1, 2, 3, 4])>

타입

type(tf.constant([1,2,3,4]))
tensorflow.python.framework.ops.EagerTensor
type(np.array([1,2,3,4]))
numpy.ndarray
type(tf.Variable(np.array([1,2,3,4])))
tensorflow.python.ops.resource_variable_ops.ResourceVariable

인덱싱

a = tf.Variable([1,2,3,4])
a
<tf.Variable 'Variable:0' shape=(4,) dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>

0번째는 1이지만

a[0]
<tf.Tensor: shape=(), dtype=int32, numpy=1>

2번째까지 불러오면 0,1 두 개, 즉 불러온 수의 전 위치까지만 불러온다.

a[:2]
<tf.Tensor: shape=(2,), dtype=int32, numpy=array([1, 2], dtype=int32)>
type(a[:2]) # tf.constant랑 같은 타입
tensorflow.python.framework.ops.EagerTensor

연산가능

(관찰1)

a = tf.Variable([1,2,3,4])
b = -a
type(a)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
type(b)
tensorflow.python.framework.ops.EagerTensor
a+b
<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 0], dtype=int32)>
type(a+b)
tensorflow.python.framework.ops.EagerTensor

b는 -a로, a를 사용하여 정의되었으므로 따로 정의되지 않는 이상 a와 같은 Variable의 자료형이 아닌 constant와 같은 자료형으로 정의

(관찰2)

a = tf.Variable([1,2,3,4])
b = tf.Variable([-1,-2,-3,-4])
type(a)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
type(b)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
a+b
<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 0, 0, 0], dtype=int32)>
type(a+b)
tensorflow.python.framework.ops.EagerTensor

Note: 연산하니 자료형이 바뀐다. 하지만 위 두 경우의 연산 결과의 자료형은 같네?

tf.Variable도 쓰기 불편함

tf.Variable([1,2]) + tf.Variable([3.14,3.14])
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
Input In [36], in <cell line: 1>()
----> 1 tf.Variable([1,2]) + tf.Variable([3.14,3.14])

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/ops/variables.py:1078, in Variable._OverloadOperator.<locals>._run_op(a, *args, **kwargs)
   1076 def _run_op(a, *args, **kwargs):
   1077   # pylint: disable=protected-access
-> 1078   return tensor_oper(a.value(), *args, **kwargs)

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py:153, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    151 except Exception as e:
    152   filtered_tb = _process_traceback_frames(e.__traceback__)
--> 153   raise e.with_traceback(filtered_tb) from None
    154 finally:
    155   del filtered_tb

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/framework/ops.py:7107, in raise_from_not_ok_status(e, name)
   7105 def raise_from_not_ok_status(e, name):
   7106   e.message += (" name: " + name if name is not None else "")
-> 7107   raise core._status_to_exception(e) from None

InvalidArgumentError: cannot compute AddV2 as input #1(zero-based) was expected to be a int32 tensor but is a float tensor [Op:AddV2]

에러나

tf.Variable([1.0,2.0]) + tf.Variable([3.14,3.14])
<tf.Tensor: shape=(2,), dtype=float32, numpy=array([4.1400003, 5.1400003], dtype=float32)>

이것은 되지만.. tnp는 int여도 float이어도 모두 연산이 가능하다고~

tnp의 은총도 일부만 가능

import tensorflow.experimental.numpy as tnp
tnp.experimental_enable_numpy_behavior()

- 알아서 형 변환

tf.Variable([1,2]) + tf.Variable([3.14,3.14])
<tf.Tensor: shape=(2,), dtype=float64, numpy=array([4.1400001, 5.1400001])>

위에서는 tnp불러오기 전 에러났었지? 된다는 증거

- .reshape 메소드

tf.Variable([1,2,3,4]).reshape(2,2)
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Input In [40], in <cell line: 1>()
----> 1 tf.Variable([1,2,3,4]).reshape(2,2)

AttributeError: 'ResourceVariable' object has no attribute 'reshape'

이건 tnp에서 되지 않음

대부분의 동작은 tf.constant랑 큰 차이를 모르겠음

- tf.concat

자료형 변환 안 되게 각각 선언해주기

$(2, 2),(2,2)\rightarrow(4,2)$

a = tf.Variable([[1,2],[3,4]])
b = tf.Variable([[-1,-2],[-3,-4]])
tf.concat([a,b],axis=0)
<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[ 1,  2],
       [ 3,  4],
       [-1, -2],
       [-3, -4]], dtype=int32)>

어 tensor네

- tf.stack

$(2,2)(2,2)\rightarrow (2,2,2)$

a = tf.Variable([[1,2],[3,4]])
b = tf.Variable([[-1,-2],[-3,-4]])
tf.stack([a,b],axis=0)
<tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
array([[[ 1,  2],
        [ 3,  4]],

       [[-1, -2],
        [-3, -4]]], dtype=int32)>
  • 연산하면 바뀌는 자료형
type(a)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
type(b)
tensorflow.python.ops.resource_variable_ops.ResourceVariable
type(tf.stack([a,b],axis=0))
tensorflow.python.framework.ops.EagerTensor

변수값변경가능(?)

a=1
id(a)
139699552157936
a=45
id(a)
139699552159344

Important: 1이라고 저장되었던 a를 다시 값을 주면 재할당의 개념.
  • 메모리 주소가 바뀐 것.
  • 주소가 같다면 편집의 개념이 되겠지
  • tf.constant 로 할당해주면 메모리 주소에 있는 것을 바꿀 수 없다.(불변!)
  • 가변형을 만들어야 하는 이유..
    • 불변형을 여러개 쓰면 메모리가 많이 필요
    • 한정된 변수 개수를 가지고 있다면 더 힘들어
  • GPU에 올릴 파라메터, 이는 가변.
  • 관찰값이 많은 것은 상관없어, 파라메터가 문제
a = tf.Variable([1,2,3,4])
id(a)
139929907979568
a.assign_add([-1,-2,-3,-4])
id(a)
139929907979568

메모리 주소 같은 모습을 볼 수 있다.

요약

- tf.Variable()로 만들어야 하는 뚜렷한 차이는 모르겠음.

- 애써 tf.Variable()로 만들어도 간단한연산을 하면 그 결과는 tf.constant()로 만든 오브젝트와 동일해짐.

  • GPU에 메모리를 많이 올리지 않기 위해,

미분

모티브

- 예제: 컴퓨터를 이용하여 $x=2$에서 $y=3x^2$의 접선의 기울기를 구해보자.

(손풀이)

$$\frac{dy}{dx} = 6x$$

이므로 $x=2$를 대입하면 $12$이다.

(컴퓨터를 이용한 풀이)

단계1

x1 = 2
y1 = 3* x1**2
x2 = 2 + 0.000000001
y2 = 3 * x2**2
(y2-y1)/(x2-x1)
12.0

단계2

def f(x):
    return(3*x**2)
f(1)
3
f(3)
27

파이썬에서는 first class object일때 가능

텐서는?

def d(f,x):
    return (f(x + 0.000000001)-f(x))/0.000000001
d(f,2)
12.000000992884452

f를 선언하지 않고 함수를 정의하는 방법은 없을까?

단계3

d(lambda x: 3*x**2,2)
12.000000992884452

실행하면 함수라는 object생기면서 입력으로 전달이 된다.

d(lambda x: x**2, 0)
1e-09

모든 함수에 대한 미분으로 확장은 불가능하다.

단계4

$$f(x,y) = x^2 + 3y$$

def f(x,y):
    return(x**2 + 3*y)
d(f(2,3))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [62], in <cell line: 1>()
----> 1 d(f(2,3))

TypeError: d() missing 1 required positional argument: 'x'

왜 안 될까?

  • 변수를 하나라 하고 가정 후 f 만들었었잖아
  • 편미분으로 구현하고자 한다면
  • x,y 각각 편미분 할 것을 정의해놓는 것 필요, 구현하는 패키지 쓰면 편하겠지.
  • 텐서에서 그 패키지 찾아서 사용하자

tf.GradientTape() 사용방법

- 예제1: $x=2$에서 $y=3x^2$의 도함수값을 구하라.

x = tf.Variable(2.0) # 미분하고 싶은 값을 tf.Variable로 잡아라
a = tf.constant(3.0)
mytape = tf.GradientTape() # 기울기, 뭔가를 입력하는...
mytape
<tensorflow.python.eager.backprop.GradientTape at 0x7f0be029edd0>

뭔가 실행이 되었고 저 메모리 주소안에 있다?!

?mytape

Type:           GradientTape
String form:    <tensorflow.python.eager.backprop.GradientTape object at 0x7f0be029edd0>
File:           ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/eager/backprop.py
Docstring:     
Record operations for automatic differentiation.

Operations are recorded if they are executed within this context manager and
at least one of their inputs is being "watched".

Trainable variables (created by `tf.Variable` or `tf.compat.v1.get_variable`,
where `trainable=True` is default in both cases) are automatically watched.
Tensors can be manually watched by invoking the `watch` method on this context
manager.

For example, consider the function `y = x * x`. The gradient at `x = 3.0` can
be computed as:

>>> x = tf.constant(3.0)
>>> with tf.GradientTape() as g:
...   g.watch(x)
...   y = x * x
>>> dy_dx = g.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(6.0, shape=(), dtype=float32)

GradientTapes can be nested to compute higher-order derivatives. For example,

>>> x = tf.constant(5.0)
>>> with tf.GradientTape() as g:
...   g.watch(x)
...   with tf.GradientTape() as gg:
...     gg.watch(x)
...     y = x * x
...   dy_dx = gg.gradient(y, x)  # dy_dx = 2 * x
>>> d2y_dx2 = g.gradient(dy_dx, x)  # d2y_dx2 = 2
>>> print(dy_dx)
tf.Tensor(10.0, shape=(), dtype=float32)
>>> print(d2y_dx2)
tf.Tensor(2.0, shape=(), dtype=float32)

By default, the resources held by a GradientTape are released as soon as
GradientTape.gradient() method is called. To compute multiple gradients over
the same computation, create a persistent gradient tape. This allows multiple
calls to the gradient() method as resources are released when the tape object
is garbage collected. For example:

>>> x = tf.constant(3.0)
>>> with tf.GradientTape(persistent=True) as g:
...   g.watch(x)
...   y = x * x
...   z = y * y
>>> dz_dx = g.gradient(z, x)  # (4*x^3 at x = 3)
>>> print(dz_dx)
tf.Tensor(108.0, shape=(), dtype=float32)
>>> dy_dx = g.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(6.0, shape=(), dtype=float32)

By default GradientTape will automatically watch any trainable variables that
are accessed inside the context. If you want fine grained control over which
variables are watched you can disable automatic tracking by passing
`watch_accessed_variables=False` to the tape constructor:

>>> x = tf.Variable(2.0)
>>> w = tf.Variable(5.0)
>>> with tf.GradientTape(
...     watch_accessed_variables=False, persistent=True) as tape:
...   tape.watch(x)
...   y = x ** 2  # Gradients will be available for `x`.
...   z = w ** 3  # No gradients will be available as `w` isn't being watched.
>>> dy_dx = tape.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(4.0, shape=(), dtype=float32)
>>> # No gradients will be available as `w` isn't being watched.
>>> dz_dy = tape.gradient(z, w)
>>> print(dz_dy)
None

Note that when using models you should ensure that your variables exist when
using `watch_accessed_variables=False`. Otherwise it's quite easy to make your
first iteration not have any gradients:

```python
a = tf.keras.layers.Dense(32)
b = tf.keras.layers.Dense(32)

with tf.GradientTape(watch_accessed_variables=False) as tape:
  tape.watch(a.variables)  # Since `a.build` has not been called at this point
                           # `a.variables` will return an empty list and the
                           # tape will not be watching anything.
  result = b(a(inputs))
  tape.gradient(result, a.variables)  # The result of this computation will be
                                      # a list of `None`s since a's variables
                                      # are not being watched.
```

Note that only tensors with real or complex dtypes are differentiable.
Init docstring:
Creates a new GradientTape.

Args:
  persistent: Boolean controlling whether a persistent gradient tape
    is created. False by default, which means at most one call can
    be made to the gradient() method on this object.
  watch_accessed_variables: Boolean controlling whether the tape will
    automatically `watch` any (trainable) variables accessed while the tape
    is active. Defaults to True meaning gradients can be requested from any
    result computed in the tape derived from reading a trainable `Variable`.
    If False users must explicitly `watch` any `Variable`s they want to
    request gradients from.

mytape 메소드 확인

dir(mytape)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_ensure_recording',
 '_persistent',
 '_pop_tape',
 '_push_tape',
 '_recording',
 '_tape',
 '_tf_api_names',
 '_tf_api_names_v1',
 '_watch_accessed_variables',
 '_watched_variables',
 'batch_jacobian',
 'gradient',
 'jacobian',
 'reset',
 'stop_recording',
 'watch',
 'watched_variables']

mytape을 tab하면 나오는 method 들 이외에 숨겨진 method값이 나온다!

  • 왜 숨겨져 있었지? 밑에 설명
?mytape.__enter__
Signature: mytape.__enter__()
Docstring: Enters a context inside which operations are recorded on this tape.
File:      ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/eager/backprop.py
Type:      method
  • 비디오 테이프 또는 연습장의 개념
  • 폈다?-> 녹화의 개념, 기록의 개념
  • 녹화 또는 가록을 할 준비가 되었다
mytape = tf.GradientTape()
mytape.__enter__() # 기록 시작
y = a*x**2 # y=ax^2 = 3x^2
mytape.__exit__(None,None,None)# 기록 끝
mytape.gradient(y,x) # y를 x로 미분하라
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>
  1. 미분하고 싶은 값을 tf.Variable에 저장한다.
  2. 미분하고 싶지 않은 값도 tf.constant로 어딘가에 선언한다.
  3. gradientt으로 만들어서
  4. enter exit 할건데
  5. 그 사이에 수식을 입력

- 예제2: 조금 다른예제

x = tf.Variable(2.0) 
# a = tf.constant(3.0)

mytape = tf.GradientTape()
mytape.__enter__() # 기록 시작
a = x/2*3 # a = (3/2)x 
y = a*x**2 # y=ax^2 = (3/2)x^3
mytape.__exit__(None,None,None)# 기록 끝

mytape.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

$$a = \frac{3}{2}x$$ $$y = ax^2 = \frac{3}{2} x^3$$

$$\frac{dy}{dx} = \frac{3}{2} 3 x^2$$

3/2*3*4
18.0

- 테이프의 개념 ($\star$)

tape에 미분하고 싶은 값과 안 할 값을 variable과 constant 이용해서 정의를 해주는데,

  • 컴퓨터에게 수식을 써서 보여줘야 하는데, 이를 명시해주자
  • 공책을 만들어주는데 그 이름도 명시해주자 여기서는 mytape

(상황)

우리가 어려운 미분계산을 컴퓨터에게 부탁하는 상황임. (예를들면 $y=3x^2$) 컴퓨터에게 부탁을 하기 위해서는 연습장(=테이프)에 $y=3x^2$이라는 수식을 써서 보여줘야하는데 이때 컴퓨터에게 target이 무엇인지 그리고 무엇으로 미분하고 싶은 것인지를 명시해야함.

(1) mytape = tf.GradientTape(): tf.GradientTape()는 연습장을 만드는 명령어, 만들어진 연습장을 mytape라고 이름을 붙인다.

  • 여러개의 연습장 생성해서 연속적인 계산 가능하게끔,
  • 연습장을 정의해줘야 여러 개의 연습장도 정의하는 길로 가겠지

(2) mytape.__enter__(): 만들어진 공책을 연다.( = 기록할 수 있는 상태로 만든다.)

  • 지금부터 내가 수식을 소개할게

(3) a=x/2*3; y=a*x**2: 컴퓨터에게 전달할 수식을 쓴다.

(4) mytape.__exit__(None,None,None): 공책을 닫는다.

(5) mytape.gradient(y,x): $y$룰 $x$로 미분하라는 메모를 남기고 컴퓨터에게 전달한다.

- 예제3: 연습장을 언제 열고 닫을지 결정하는건 중요하다.

x = tf.Variable(2.0) 
a = (x/2)*3

mytape = tf.GradientTape()
mytape.__enter__() # 기록 시작
y = a*x**2
mytape.__exit__(None,None,None)# 기록 끝

mytape.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

연습장 밖에서 정의한 a, 컴퓨터는 이 식을 모른다.

- 예제4: with문과 함께 쓰는 tf.GradientTape()

매크로화 시켜볼까

with 다음 선언되는 것은 그 다음 실행되는 것은 object가 생성됨

  • 똑같이 실행될 식 가져와
x = tf.Variable(2.0) 
a = (x/2)*3
with tf.GradientTape() as mytape:
    ## with 문 시작
    y = a*x**2
    ## with문 끝
mytape.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

(문법해설)

아래와 같이 쓴다.

with expression as myname:
    ## with문 시작: myname.__enter__()
    실행하고 싶은 것들 입력
    ## with문 끝: myname.__exit__()

위와 같은 형식을 많이 접할 수 있다.

  • 반복적인 계산 포함될 때 편해

(1) expresion 의 실행 결과 오브젝트가 생성, 생성된 오브젝트는 myname으로 이름 붙임. 이 오브젝트는 .__enter__().__exit__를 숨겨진 기능으로 포함해야 한다.

  • __ 있는 이유 사용자가 쓸 필요가 없으니까.
  • with같이 쓰면 __ 쓸 이유가 없어서 아예 숨겨버림(위에서 왜 숨겼지?에 대한 답변이 되겠지)
    • with문 시작하면 enter로 시작되고 exit로 끝나는게 자동으로 정의된다.

(2) with문이 시작되면서 myname__enter__()이 실행된다.

  • 여기까지는 약속.

(3) 실행하고 싶은 것들 입력한 게 실행된다.

(4) with문이 종료되면서 myname.__exit__()이 실행된다.

- 예제5: 예제2를 with문과 함께 구현

x = tf.Variable(2.0) 

with tf.GradientTape() as myname:
    a = x/2*3
    y = a*x**2
    
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

- 예제6: persistent = True

tf.GradientTape?

Init signature: tf.GradientTape(persistent=False, watch_accessed_variables=True)
Docstring:     
Record operations for automatic differentiation.

Operations are recorded if they are executed within this context manager and
at least one of their inputs is being "watched".

Trainable variables (created by `tf.Variable` or `tf.compat.v1.get_variable`,
where `trainable=True` is default in both cases) are automatically watched.
Tensors can be manually watched by invoking the `watch` method on this context
manager.

For example, consider the function `y = x * x`. The gradient at `x = 3.0` can
be computed as:

>>> x = tf.constant(3.0)
>>> with tf.GradientTape() as g:
...   g.watch(x)
...   y = x * x
>>> dy_dx = g.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(6.0, shape=(), dtype=float32)

GradientTapes can be nested to compute higher-order derivatives. For example,

>>> x = tf.constant(5.0)
>>> with tf.GradientTape() as g:
...   g.watch(x)
...   with tf.GradientTape() as gg:
...     gg.watch(x)
...     y = x * x
...   dy_dx = gg.gradient(y, x)  # dy_dx = 2 * x
>>> d2y_dx2 = g.gradient(dy_dx, x)  # d2y_dx2 = 2
>>> print(dy_dx)
tf.Tensor(10.0, shape=(), dtype=float32)
>>> print(d2y_dx2)
tf.Tensor(2.0, shape=(), dtype=float32)

By default, the resources held by a GradientTape are released as soon as
GradientTape.gradient() method is called. To compute multiple gradients over
the same computation, create a persistent gradient tape. This allows multiple
calls to the gradient() method as resources are released when the tape object
is garbage collected. For example:

>>> x = tf.constant(3.0)
>>> with tf.GradientTape(persistent=True) as g:
...   g.watch(x)
...   y = x * x
...   z = y * y
>>> dz_dx = g.gradient(z, x)  # (4*x^3 at x = 3)
>>> print(dz_dx)
tf.Tensor(108.0, shape=(), dtype=float32)
>>> dy_dx = g.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(6.0, shape=(), dtype=float32)

By default GradientTape will automatically watch any trainable variables that
are accessed inside the context. If you want fine grained control over which
variables are watched you can disable automatic tracking by passing
`watch_accessed_variables=False` to the tape constructor:

>>> x = tf.Variable(2.0)
>>> w = tf.Variable(5.0)
>>> with tf.GradientTape(
...     watch_accessed_variables=False, persistent=True) as tape:
...   tape.watch(x)
...   y = x ** 2  # Gradients will be available for `x`.
...   z = w ** 3  # No gradients will be available as `w` isn't being watched.
>>> dy_dx = tape.gradient(y, x)
>>> print(dy_dx)
tf.Tensor(4.0, shape=(), dtype=float32)
>>> # No gradients will be available as `w` isn't being watched.
>>> dz_dy = tape.gradient(z, w)
>>> print(dz_dy)
None

Note that when using models you should ensure that your variables exist when
using `watch_accessed_variables=False`. Otherwise it's quite easy to make your
first iteration not have any gradients:

```python
a = tf.keras.layers.Dense(32)
b = tf.keras.layers.Dense(32)

with tf.GradientTape(watch_accessed_variables=False) as tape:
  tape.watch(a.variables)  # Since `a.build` has not been called at this point
                           # `a.variables` will return an empty list and the
                           # tape will not be watching anything.
  result = b(a(inputs))
  tape.gradient(result, a.variables)  # The result of this computation will be
                                      # a list of `None`s since a's variables
                                      # are not being watched.
```

Note that only tensors with real or complex dtypes are differentiable.
Init docstring:
Creates a new GradientTape.

Args:
  persistent: Boolean controlling whether a persistent gradient tape
    is created. False by default, which means at most one call can
    be made to the gradient() method on this object.
  watch_accessed_variables: Boolean controlling whether the tape will
    automatically `watch` any (trainable) variables accessed while the tape
    is active. Defaults to True meaning gradients can be requested from any
    result computed in the tape derived from reading a trainable `Variable`.
    If False users must explicitly `watch` any `Variable`s they want to
    request gradients from.
File:           ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/eager/backprop.py
Type:           type
Subclasses:     LossScaleGradientTape

(관찰1)

x = tf.Variable(2.0) 

with tf.GradientTape() as myname:
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>
myname.gradient(y,x)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Input In [79], in <cell line: 1>()
----> 1 myname.gradient(y,x)

File ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/eager/backprop.py:1032, in GradientTape.gradient(self, target, sources, output_gradients, unconnected_gradients)
   1002 """Computes the gradient using operations recorded in context of this tape.
   1003 
   1004 Note: Unless you set `persistent=True` a GradientTape can only be used to
   (...)
   1029    called with an unknown value.
   1030 """
   1031 if self._tape is None:
-> 1032   raise RuntimeError("A non-persistent GradientTape can only be used to "
   1033                      "compute one set of gradients (or jacobians)")
   1034 if self._recording:
   1035   if not self._persistent:

RuntimeError: A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)

한 번 연산하면 버려버리는 컴퓨터, 그래서 두 번 이상 실행하면 에러가 뜨지,

  • keep해놓기 위한 명령어 persistent!

(관찰2)

x = tf.Variable(2.0) 

with tf.GradientTape(persistent=True) as myname:
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

이제 계속 실행 되지?

  • 두 번 이상 실행해도 에러가 나지 않는다.

- 예제7: watch

myname.watch?

Signature: myname.watch(tensor)
Docstring:
Ensures that `tensor` is being traced by this tape.

Args:
  tensor: a Tensor or list of Tensors.

Raises:
  ValueError: if it encounters something that is not a tensor.
File:      ~/anaconda3/envs/stbda2022/lib/python3.10/site-packages/tensorflow/python/eager/backprop.py
Type:      method

(관찰1)

x = tf.Variable(2.0) 

with tf.GradientTape(persistent=True) as myname:
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>

(관찰2)

x = tf.constant(2.0) 

with tf.GradientTape(persistent=True) as myname:
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
print(myname.gradient(y,x))
None

constant로 선언하니 무엇을 미분할지 몰라서 연습장에서 보고 버려버리는 컴퓨터.

Variable로 선언해야겠네?

x = tf.constant(2.0) 

with tf.GradientTape(persistent=True) as myname:
    myname.watch(x)
    a = x/2*3
    y = a*x**2
print(myname.gradient(y,x))
tf.Tensor(18.0, shape=(), dtype=float32)

Inmportant:constant로 선언하고 watch로 기록해주면(수동감시) Variable과(자동감시) 결과가 같아

  • ex) 100개 중에 80개는 variable, 20개는 constant로 해서 training 혹은 test set 만들 수 있겠지

(관찰3)

x = tf.Variable(2.0)

관찰

watch_accessed_variables=False

관찰 하지마

x = tf.Variable(2.0) 

with tf.GradientTape(persistent=True,watch_accessed_variables=False) as myname: # 자동으로 감시되는 모드를 꺼라
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
print(myname.gradient(y,x))
None

variable입력했는데도 watch default값인 true를 false로 지정해주니 constant 역할을 하네!

(관찰5)

x = tf.Variable(2.0)

관찰

watch_accessed_variables=True
# 입력 안하면 자동 True 입력된다.

관찰

myname.watch(x)

관찰

x = tf.Variable(2.0) 

with tf.GradientTape(persistent=True) as myname:
    myname.watch(x)
    a = x/2*3
    y = a*x**2
myname.gradient(y,x)
<tf.Tensor: shape=(), dtype=float32, numpy=18.0>
myname.watch(x)

생략해도 되는 코드