6626070
2997924

AI03, Artificial neural networks

Back to the previous pagepage management
List of posts to read before reading this article


Contents


Resource

CPU

CPU Resource info

# cat /proc/cpuinfo

Total number of CPU cores

$ grep -c processor /proc/cpuinfo

Number of CPUs

$ grep "physical id" /proc/cpuinfo | sort -u | wc -l

Number of cores per one CPU

$ grep "cpu cores" /proc/cpuinfo | tail -1





GPU

GPU Resource info

GPU Monitoring tools URL
GPU Memory control(1) GPU Memory control(2)

$ nvidia-smi
$ watch -n 1 -d nvidia-smi
$ fuser -v /dev/nvidia*
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
OUTPUT
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 7812072362293866351
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 12834618334973673973
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 1
  links {
  }
}
incarnation: 2176570505504160042
physical_device_desc: "device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:3b:00.0, compute capability: 7.5"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 1
  links {
  }
}
incarnation: 16344150243988831062
physical_device_desc: "device: 1, name: GeForce RTX 2080 Ti, pci bus id: 0000:5e:00.0, compute capability: 7.5"
, name: "/device:GPU:2"
device_type: "GPU"
memory_limit: 10813738189
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 15503034830640890796
physical_device_desc: "device: 2, name: GeForce RTX 2080 Ti, pci bus id: 0000:86:00.0, compute capability: 7.5"
, name: "/device:GPU:3"
device_type: "GPU"
memory_limit: 10812430746
locality {
  bus_id: 2
  numa_node: 1
  links {
  }
}
incarnation: 17206545542125030428
physical_device_desc: "device: 3, name: GeForce RTX 2080 Ti, pci bus id: 0000:af:00.0, compute capability: 7.5"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 3251941024359796176
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:1"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 14468545947390282029
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:2"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 759770992281457065
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_GPU:3"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 15023472020250575167
physical_device_desc: "device: XLA_GPU device"
]




Deallocate memory on GPU

$ nvidia-smi --gpu-reset -i 0
# forcely
$ kill -9 [PID_num]

Allocate memory on GPU

URL1, URL2
, tensorflow : One GPU(default)

import tensorflow as tf

[Code : data preprocessing]
[Code : data neural net model]

tensorflow : One GPU with CPU

import tensroflow as tf

tf.debugging.set_log_device_placement(True)

try:
    with tf.device('/device:CPU:0'):
        [Code : data preprocessing]
    with tf.device('/device:GPU:2'):
        [Code : deep neural net model]
        
except RuntimeError as e:
    print(e)

tensorflow : Multi-GPU with CPU

import tensorflow as tf

tf.debugging.set_log_device_placement(True)

gpus = tf.config.experimental.list_logical_devices('GPU')
if gpus:
    with tf.device('/CPU:0'):
        [Code : data preprocessing]

    for gpu in gpus:
        with tf.device(gpu.name):
            [Code : deep neural net model]

pytorch





tensorboard

import tensorflow as tf
from datetime import datetime
import os

%load_ext tensorboard
%matplotlib inline

[Code : data preprocessing]
[Code : data neural net model]

tensorboard = tf.keras.callbacks.TensorBoard(
    log_dir=os.path.join('logs',  datetime.now().strftime("%Y%m%d-%H%M%S")), 
    write_graph=True, 
    write_images=True,
    histogram_freq=1
)

%tensorboard --logdir logs --port [port_num]





Numpy

Regression

Simple Linear regression

import numpy as np
import matplotlib.pyplot as plt

def cost():
    c = 0
    for i in range(len(X)) :
        c += (W * X[i] - Y[i]) ** 2
    return c / len(X)

def W_grad():
    return np.sum(np.multiply(np.multiply(W, X) + b - Y, X))

def b_grad():
    return np.sum(np.multiply(np.multiply(W, X) + b - Y, 1))

# data
X = np.array([1, 2, 3, 4, 5])
Y = np.array([1, 2, 3, 4, 5])

# parameters
W = 2.5; b = 1;
alpha = 0.01; beta = 0.1;
fig, axes = plt.subplots(1,2,figsize=(15,5))

# gradient descent
epochs = 5;
curr_cost = []; step = [];
for i in range(epochs):
    # update
    W = W - np.multiply(alpha, W_grad()); print('W = ', W)
    b = b - np.multiply(beta, b_grad()); print('b = ', b)
    
    # visualize results
    curr_cost.append(cost())
    step.append(i+1)
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost)    
axes[1].plot(X,Y, 'o')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

image




Multi-variable regression





Logistic regression





Soft-max regression






FCN

FCN through numerical method

import time
import numpy as np

epsilon = 0.0001

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def mean_squared_error(h, y):
    return 1 / 2 * np.mean(np.square(h - y))


class Neuron:
    def __init__(self, W, b, a):
        # Model Parameter
        self.W = W
        self.b = b
        self.a = a

        # Gradients
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)

    def __call__(self, x):
        return self.a(_m(_t(self.W), x) + self.b) # activation((W^T)x + b)

class DNN:
    def __init__(self, hidden_depth, num_neuron, num_input, num_output, activation=sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(num_input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden layers
        for _ in range(hidden_depth - 1):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output layer
        W, b = init_var(num_neuron, num_output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, x, y, loss_func):
        def get_new_sequence(layer_index, new_neuron):
            new_sequence = list()
            for i, layer in enumerate(self.sequence):
                if i == layer_index:
                    new_sequence.append(new_neuron)
                else:
                    new_sequence.append(layer)
            return new_sequence

        def eval_sequence(x, sequence):
            for layer in sequence:
                x = layer(x)
            return x

        loss = loss_func(self(x), y)

        for layer_id, layer in enumerate(self.sequence): # iterate layer
            for w_i, w in enumerate(layer.W): # iterate W (row)
                for w_j, ww in enumerate(w): # iterate W (col)
                    W = np.copy(layer.W)
                    W[w_i][w_j] = ww + epsilon

                    new_neuron = Neuron(W, layer.b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)

                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.dW[w_i][w_j] = num_grad

                for b_i, bb in enumerate(layer.b): # iterate b
                    b = np.copy(layer.b)
                    b[b_i] = bb + epsilon

                    new_neuron = Neuron(layer.W, b, layer.a)
                    new_seq = get_new_sequence(layer_id, new_neuron)
                    h = eval_sequence(x, new_seq)

                    num_grad = (loss_func(h, y) - loss) / epsilon  # (f(x+eps) - f(x)) / epsilon
                    layer.db[b_i] = num_grad
        return loss

def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = network.calc_gradient(x, y, loss_obj)
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

dnn = DNN(hidden_depth=5, num_neuron=32, num_input=10, num_output=2, activation=sigmoid)

t = time.time()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, mean_squared_error, 0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))




FCN through backpropagation

import time
import numpy as np

def _t(x):
    return np.transpose(x)

def _m(A, B):
    return np.matmul(A, B)

class Sigmoid:
    def __init__(self):
        self.last_o = 1

    def __call__(self, x):
        self.last_o = 1 / (1.0 + np.exp(-x))
        return self.last_o

    def grad(self): # sigmoid(x)(1-sigmoid(x))
        return self.last_o * (1 - self.last_o)

class MeanSquaredError:
    def __init__(self):
        # gradient
        self.dh = 1
        self.last_diff = 1

    def __call__(self, h, y): # 1/2 * mean ((h - y)^2)
        self.last_diff = h - y
        return 1 / 2 * np.mean(np.square(h - y))

    def grad(self): # h - y
        return self.last_diff

class Neuron:
    def __init__(self, W, b, a_obj):
        # Model parameters
        self.W = W
        self.b = b
        self.a = a_obj()

        # gradient
        self.dW = np.zeros_like(self.W)
        self.db = np.zeros_like(self.b)
        self.dh = np.zeros_like(_t(self.W))

        self.last_x = np.zeros((self.W.shape[0]))
        self.last_h = np.zeros((self.W.shape[1]))

    def __call__(self, x):
        self.last_x = x
        self.last_h = _m(_t(self.W), x) + self.b
        return self.a(self.last_h)

    def grad(self): # dy/dh = W
        return self.W * self.a.grad()

    def grad_W(self, dh):
        grad = np.ones_like(self.W)
        grad_a = self.a.grad()
        for j in range(grad.shape[1]): # dy/dw = x
            grad[:, j] = dh[j] * grad_a[j] * self.last_x
        return grad

    def grad_b(self, dh): # dy/dh = 1
        return dh * self.a.grad()

class DNN:
    def __init__(self, hidden_depth, num_neuron, input, output, activation=Sigmoid):
        def init_var(i, o):
            return np.random.normal(0.0, 0.01, (i, o)), np.zeros((o,))

        self.sequence = list()
        # First hidden layer
        W, b = init_var(input, num_neuron)
        self.sequence.append(Neuron(W, b, activation))

        # Hidden Layers
        for index in range(hidden_depth):
            W, b = init_var(num_neuron, num_neuron)
            self.sequence.append(Neuron(W, b, activation))

        # Output Layer
        W, b = init_var(num_neuron, output)
        self.sequence.append(Neuron(W, b, activation))

    def __call__(self, x):
        for layer in self.sequence:
            x = layer(x)
        return x

    def calc_gradient(self, loss_obj):
        loss_obj.dh = loss_obj.grad()
        self.sequence.append(loss_obj)

        # back-prop loop
        for i in range(len(self.sequence) - 1, 0, -1):
            l1 = self.sequence[i]
            l0 = self.sequence[i - 1]

            l0.dh = _m(l0.grad(), l1.dh)
            l0.dW = l0.grad_W(l1.dh)
            l0.db = l0.grad_b(l1.dh)

        self.sequence.remove(loss_obj)

def gradient_descent(network, x, y, loss_obj, alpha=0.01):
    loss = loss_obj(network(x), y)  # Forward inference
    network.calc_gradient(loss_obj)  # Back-propagation
    for layer in network.sequence:
        layer.W += -alpha * layer.dW
        layer.b += -alpha * layer.db
    return loss

x = np.random.normal(0.0, 1.0, (10,))
y = np.random.normal(0.0, 1.0, (2,))

t = time.time()
dnn = DNN(hidden_depth=5, num_neuron=32, input=10, output=2, activation=Sigmoid)
loss_obj = MeanSquaredError()
for epoch in range(100):
    loss = gradient_descent(dnn, x, y, loss_obj, alpha=0.01)
    print('Epoch {}: Test loss {}'.format(epoch, loss))
print('{} seconds elapsed.'.format(time.time() - t))

CNN





RNN





Tensorflow

image




Regression

Simple Linear regression

import tensorflow as tf
import matplotlib.pyplot as plt

def cost():
    return tf.reduce_mean(tf.square(W * X + b - Y))

def W_grad():
    return tf.reduce_mean(tf.multiply(tf.multiply(W, X) + b - Y, X))

def b_grad():
    return tf.reduce_mean(tf.multiply(tf.multiply(W, X) + b - Y, X))

# data
X = [1., 2., 3., 4., 5.]
Y = [1., 3., 5., 7., 9.]

# parameters
W = tf.Variable([5.0]); b = tf.Variable([1.0]);
alpha = 0.05; beta = 0.05;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    curr_grad = W - tf.multiply(alpha, W_grad()); W.assign(curr_grad); print('W = ', W.numpy())
    curr_grad = b - tf.multiply(beta, b_grad()); b.assign(curr_grad); print('b = ', b.numpy())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost())
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

image

with GradientTape

import tensorflow as tf
import matplotlib.pyplot as plt
#tf.enable_eager_execution()

# data
X = [1, 2, 3, 4, 5]
Y = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable(2.9); b = tf.Variable(0.5);
alpha = 0.03; beta = 0.03;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = W * X + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(alpha * W_grad); print('W = ', W.numpy())
    b.assign_sub(beta * b_grad); print('b = ', b.numpy())

    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

image




Multi-variable regression

with GradientTape

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X1 = [1, 0, 3, 0, 5]; X2 = [0, 2, 0, 4, 0]
Y  = [1, 2, 3, 4, 5]

# parameters
W1 = tf.Variable([1.0]); W2 = tf.Variable([1.0]); b = tf.Variable([1.0]);
alpha1 = tf.Variable(0.03); alpha2 = tf.Variable(0.03); beta = tf.Variable(0.03);
fig, axes = plt.subplots(1,3,figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = W1*X1 + W2*X2 + b
        cost = tf.reduce_mean(tf.square(hypothesis - Y))
    W1_grad, W2_grad, b_grad = tape.gradient(cost, [W1, W2, b])
    W1.assign_sub(alpha1 * W1_grad); print('W1 = ', W1.numpy())
    W2.assign_sub(alpha2 * W2_grad); print('W2 = ', W2.numpy())
    b.assign_sub(beta * b_grad); print('b = ', b.numpy())
    
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X1, W1*X1 + W2*X2 + b)
    axes[2].plot(X2, W1*X1 + W2*X2 + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X1, Y, 'x')
axes[2].plot(X2, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()    

image with GradientTape, vectorization(matrix)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 0., 3., 0., 5.],
     [0., 2., 0., 4., 0.]]
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0]]); b = tf.Variable([1.0]);
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X) + b # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad, b_grad = tape.gradient(cost, [W, b])
    W.assign_sub(learning_rate * W_grad); print(W.numpy())
    b.assign_sub(learning_rate * b_grad); print(b.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[0], W[0][0]*X[0] + W[0][1]*X[1] + b)
    axes[2].plot(X[1], W[0][0]*X[0] + W[0][1]*X[1] + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[0], Y, 'x')
axes[2].plot(X[1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image

with GradientTape, vectorization(matrix), optimizer(update)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 0., 3., 0., 5.],
     [0., 2., 0., 4., 0.]]
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0]]); b = tf.Variable([1.0]);
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X) + b # (1, 2) * (2, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad, b_grad = tape.gradient(cost, [W, b])
    optimizer.apply_gradients(grads_and_vars=zip([W_grad, b_grad],[W, b])); print('W = ', W.numpy(),'b = ',b.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[0], W[0][0]*X[0] + W[0][1]*X[1] + b)
    axes[2].plot(X[1], W[0][0]*X[0] + W[0][1]*X[1] + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[0], Y, 'x')
axes[2].plot(X[1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image

with GradientTape, vectorization(matrix), implicit bias(b)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 1., 1., 1., 1.],   # bias(b)
     [1., 0., 3., 0., 5.],   # feature 1
     [0., 2., 0., 4., 0.]]   # feature 2
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0, 1.0]])
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X)   # (1, 3) * (3, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad = tape.gradient(cost, [W])
    W.assign_sub(learning_rate * W_grad[0]);print(W.numpy())
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[1], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
    axes[2].plot(X[2], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[1], Y, 'x')
axes[2].plot(X[2], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image

with GradientTape, vectorization(matrix), implicit bias(b), optimizer(update)

import tensorflow as tf
import matplotlib.pyplot as plt

# data
X = [[1., 1., 1., 1., 1.],   # bias(b)
     [1., 0., 3., 0., 5.],   # feature 1
     [0., 2., 0., 4., 0.]]   # feature 2
Y  = [1, 2, 3, 4, 5]

# parameters
W = tf.Variable([[1.0, 1.0, 1.0]])
learning_rate = tf.Variable(0.05)
fig, axes = plt.subplots(1, 3, figsize=(15,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    with tf.GradientTape() as tape:
        hypothesis = tf.matmul(W, X)   # (1, 3) * (3, 5) = (1, 5)
        cost = tf.reduce_mean(tf.square(hypothesis - Y))

    W_grad = tape.gradient(cost, [W])
    optimizer.apply_gradients(grads_and_vars=zip(W_grad,[W])); print(W.numpy())
        
        
    # visualize results
    curr_cost.append(cost)
    step.append(i+1)
    axes[1].plot(X[1], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
    axes[2].plot(X[2], W[0][1]*X[1] + W[0][2]*X[2] + W[0][0]*X[0])
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[1], Y, 'x')
axes[2].plot(X[2], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image




Logistic regression

import tensorflow as tf
import matplotlib.pyplot as plt

# data
x_train = tf.constant([[1., 2.],
                       [2., 3.],
                       [3., 1.],
                       [4., 3.],
                       [5., 3.],
                       [6., 2.]])
y_train = tf.constant([[0.],
                       [0.],
                       [0.],
                       [1.],
                       [1.],
                       [1.]])
x_test = tf.constant([[5.,2.]])
y_test = tf.constant([[1.]])
dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(len(x_train))#.repeat()

# parameters
W = tf.Variable(tf.zeros([2,1])); b = tf.Variable(tf.zeros([1]));
learning_rate = 0.001
fig, axes = plt.subplots(1,3,figsize=(15,5))

# gradient descent
epochs = 1000
curr_cost = []; step = [];
optimizer = tf.keras.optimizers.SGD(learning_rate)
for i in range(epochs):
    for features, labels  in iter(dataset):
        with tf.GradientTape() as tape:
            hypothesis = tf.divide(1., 1. + tf.exp(tf.matmul(features, W) + b))
            cost = -tf.reduce_mean(labels * tf.math.log(hypothesis) + (1 - labels) * tf.math.log(1 - hypothesis))
        grads = tape.gradient(cost, [W,b])
        optimizer.apply_gradients(grads_and_vars=zip(grads,[W,b])); print(W.numpy(), b.numpy())

        # visualize results
        curr_cost.append(cost)
        step.append(i+1)
        axes[1].plot(features[:,0], tf.divide(1., 1. + tf.exp(tf.matmul(features,W) + b)))
        axes[2].plot(features[:,1], tf.divide(1., 1. + tf.exp(tf.matmul(features,W) + b)))
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(x_train[:,0], y_train, 'x')
axes[2].plot(x_train[:,1], y_train, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

hypothesis = tf.divide(1., 1. + tf.exp(tf.matmul(x_test, W) + b))
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, y_test), dtype=tf.int32))
print(predicted)

image




Soft-max regression






Perceptron

OR





XOR



FCN

Beginner mode

import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([tf.keras.layers.Flatten(input_shape=(28, 28)),
                                    tf.keras.layers.Dense(2000, activation='relu'),
                                    tf.keras.layers.Dropout(0.2),
                                    tf.keras.layers.Dense(1000, activation='relu'),
                                    tf.keras.layers.Dense(500, activation='relu'),
                                    tf.keras.layers.Dense(200, activation='relu'),
                                    tf.keras.layers.Dense(10, activation='softmax')])
model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)




Expert mode


CNN

Beginner mode

import tensorflow as tf
import numpy as np

from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense
from tensorflow.keras.models import Sequential

EPOCHS = 10

def MyModel():
    return Sequential([Conv2D(32, (3, 3), padding='same', activation='relu'), # 28x28x32
                       MaxPool2D(), # 14x14x32
                       Conv2D(64, (3, 3), padding='same', activation='relu'), # 14x14x64
                       MaxPool2D(), # 7x7x64
                       Conv2D(128, (3, 3), padding='same', activation='relu'), # 7x7x128
                       Flatten(), # 6272
                       Dense(128, activation='relu'),
                       Dense(10, activation='softmax')]) # 128


fashion_mnist = tf.keras.datasets.fashion_mnist
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

# NHWC
x_train = x_train[..., np.newaxis]
x_test = x_test[..., np.newaxis]

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32).prefetch(2048)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32).prefetch(2048)


model = MyModel()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)            




Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class ConvNet(tf.keras.Model):
    def __init__(self):
        super(ConvNet, self).__init__()
        conv2d = tf.keras.layers.Conv2D
        maxpool = tf.keras.layers.MaxPool2D
        self.sequence = list()
        self.sequence.append(conv2d(16, (3, 3), padding='same', activation='relu')) # 28x28x16
        self.sequence.append(conv2d(16, (3, 3), padding='same', activation='relu')) # 28x28x16
        self.sequence.append(maxpool((2,2))) # 14x14x16
        self.sequence.append(conv2d(32, (3, 3), padding='same', activation='relu')) # 14x14x32
        self.sequence.append(conv2d(32, (3, 3), padding='same', activation='relu')) # 14x14x32
        self.sequence.append(maxpool((2,2))) # 7x7x32
        self.sequence.append(conv2d(64, (3, 3), padding='same', activation='relu')) # 7x7x64
        self.sequence.append(conv2d(64, (3, 3), padding='same', activation='relu')) # 7x7x64
        self.sequence.append(tf.keras.layers.Flatten()) # 1568
        self.sequence.append(tf.keras.layers.Dense(128, activation='relu'))
        self.sequence.append(tf.keras.layers.Dense(10, activation='softmax'))

    def call(self, x, training=False, mask=None):
        for layer in self.sequence:
            x = layer(x)
        return x

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# x_train : (NUM_SAMPLE, 28, 28) -> (NUM_SAMPLE, 28, 28, 1)
x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


# Create model
model = ConvNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()





CNN(DNN)

Beginner mode





Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class DenseUnit(tf.keras.Model):
    def __init__(self, filter_out, kernel_size):
        super(DenseUnit, self).__init__()
        self.bn = tf.keras.layers.BatchNormalization()
        self.conv = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')
        self.concat = tf.keras.layers.Concatenate()

    def call(self, x, training=False, mask=None): # x: (Batch, H, W, Ch_in)
        h = self.bn(x, training=training)
        h = tf.nn.relu(h)
        h = self.conv(h) # h: (Batch, H, W, filter_output)
        return self.concat([x, h]) # (Batch, H, W, (Ch_in + filter_output))

class DenseLayer(tf.keras.Model):
    def __init__(self, num_unit, growth_rate, kernel_size):
        super(DenseLayer, self).__init__()
        self.sequence = list()
        for idx in range(num_unit):
            self.sequence.append(DenseUnit(growth_rate, kernel_size))

    def call(self, x, training=False, mask=None):
        for unit in self.sequence:
            x = unit(x, training=training)
        return x

class TransitionLayer(tf.keras.Model):
    def __init__(self, filters, kernel_size):
        super(TransitionLayer, self).__init__()
        self.conv = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
        self.pool = tf.keras.layers.MaxPool2D()

    def call(self, x, training=False, mask=None):
        x = self.conv(x)
        return self.pool(x)

class DenseNet(tf.keras.Model):
    def __init__(self):
        super(DenseNet, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(8, (3, 3), padding='same', activation='relu') # 28x28x8

        self.dl1 = DenseLayer(2, 4, (3, 3)) # 28x28x16
        self.tr1 = TransitionLayer(16, (3, 3)) # 14x14x16

        self.dl2 = DenseLayer(2, 8, (3, 3)) # 14x14x32
        self.tr2 = TransitionLayer(32, (3, 3)) # 7x7x32

        self.dl3 = DenseLayer(2, 16, (3, 3)) # 7x7x64

        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, x, training=False, mask=None):
        x = self.conv1(x)

        x = self.dl1(x, training=training)
        x = self.tr1(x)

        x = self.dl2(x, training=training)
        x = self.tr2(x)

        x = self.dl3(x, training=training)

        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


# Create model
model = DenseNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()





RNN(LSTM)

Beginner mode

import tensorflow as tf

EPOCHS = 10
NUM_WORDS = 10000

class MyModel(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 16)
        self.rnn = tf.keras.layers.SimpleRNN(32)
        self.dense = tf.keras.layers.Dense(1, activation='sigmoid')
    
    def call(self, x, training=None, mask=None):
        x = self.emb(x)
        x = self.rnn(x)
        return self.dense(x)


imdb = tf.keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=NUM_WORDS)
x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=32)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=32)
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


model = MyModel()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)




Expert mode

import tensorflow as tf

EPOCHS = 10
NUM_WORDS = 10000

class MyModel(tf.keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 16)
        self.rnn = tf.keras.layers.LSTM(32)
        self.dense = tf.keras.layers.Dense(2, activation='softmax')

    def call(self, x, training=None, mask=None):
        x = self.emb(x)
        x = self.rnn(x)
        return self.dense(x)

# Implement training loop
@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

imdb = tf.keras.datasets.imdb
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=NUM_WORDS)

x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=32)
x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test,
                                                      value=0,
                                                      padding='pre',
                                                      maxlen=32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Create model
model = MyModel()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


for epoch in range(EPOCHS):
    for seqs, labels in train_ds:
        train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_seqs, test_labels in test_ds:
        test_step(model, test_seqs, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()





GAN

github

Beginner mode





Expert mode






ResNET

Beginner mode





Expert mode

import tensorflow as tf
import numpy as np

EPOCHS = 10

class ResidualUnit(tf.keras.Model):
    def __init__(self, filter_in, filter_out, kernel_size):
        super(ResidualUnit, self).__init__()
        self.bn1 = tf.keras.layers.BatchNormalization()
        self.conv1 = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')

        self.bn2 = tf.keras.layers.BatchNormalization()
        self.conv2 = tf.keras.layers.Conv2D(filter_out, kernel_size, padding='same')

        if filter_in == filter_out:
            self.identity = lambda x: x
        else:
            self.identity = tf.keras.layers.Conv2D(filter_out, (1,1), padding='same')

    def call(self, x, training=False, mask=None):
        h = self.bn1(x, training=training)
        h = tf.nn.relu(h)
        h = self.conv1(h)

        h = self.bn2(h, training=training)
        h = tf.nn.relu(h)
        h = self.conv2(h)
        return self.identity(x) + h

class ResnetLayer(tf.keras.Model):
    def __init__(self, filter_in, filters, kernel_size):
        super(ResnetLayer, self).__init__()
        self.sequence = list()
        for f_in, f_out in zip([filter_in] + list(filters), filters):
            self.sequence.append(ResidualUnit(f_in, f_out, kernel_size))

    def call(self, x, training=False, mask=None):
        for unit in self.sequence:
            x = unit(x, training=training)
        return x

class ResNet(tf.keras.Model):
    def __init__(self):
        super(ResNet, self).__init__()
        self.conv1 = tf.keras.layers.Conv2D(8, (3, 3), padding='same', activation='relu') # 28x28x8

        self.res1 = ResnetLayer(8, (16, 16), (3, 3)) # 28x28x16
        self.pool1 = tf.keras.layers.MaxPool2D((2, 2)) # 14x14x16

        self.res2 = ResnetLayer(16, (32, 32), (3, 3)) # 14x14x32
        self.pool2 = tf.keras.layers.MaxPool2D((2, 2)) # 7x7x32

        self.res3 = ResnetLayer(32, (64, 64), (3, 3)) # 7x7x64

        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(128, activation='relu')
        self.dense2 = tf.keras.layers.Dense(10, activation='softmax')

    def call(self, x, training=False, mask=None):
        x = self.conv1(x)

        x = self.res1(x, training=training)
        x = self.pool1(x)
        x = self.res2(x, training=training)
        x = self.pool2(x)
        x = self.res3(x, training=training)

        x = self.flatten(x)
        x = self.dense1(x)
        return self.dense2(x)

# Implement training loop
@tf.function
def train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, images, labels, loss_object, test_loss, test_accuracy):
    predictions = model(images, training=False)

    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

x_train = x_train[..., tf.newaxis].astype(np.float32)
x_test = x_test[..., tf.newaxis].astype(np.float32)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Create model
model = ResNet()

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


for epoch in range(EPOCHS):
    for images, labels in train_ds:
        train_step(model, images, labels, loss_object, optimizer, train_loss, train_accuracy)

    for test_images, test_labels in test_ds:
        test_step(model, test_images, test_labels, loss_object, test_loss, test_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100,
                          test_loss.result(),
                          test_accuracy.result() * 100))
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()





Attention Net

Beginner mode





Expert mode

chatbot_data.zip

import random
import tensorflow as tf
from konlpy.tag import Okt

EPOCHS = 200
NUM_WORDS = 2000

class Encoder(tf.keras.Model):
    def __init__(self):
        super(Encoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)

    def call(self, x, training=False, mask=None):
        x = self.emb(x)
        H, h, c = self.lstm(x)
        return H, h, c

class Decoder(tf.keras.Model):
    def __init__(self):
        super(Decoder, self).__init__()
        self.emb = tf.keras.layers.Embedding(NUM_WORDS, 64)
        self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True)
        self.att = tf.keras.layers.Attention()
        self.dense = tf.keras.layers.Dense(NUM_WORDS, activation='softmax')

    def call(self, inputs, training=False, mask=None):
        x, s0, c0, H = inputs
        x = self.emb(x)
        S, h, c = self.lstm(x, initial_state=[s0, c0])

        S_ = tf.concat([s0[:, tf.newaxis, :], S[:, :-1, :]], axis=1)
        A = self.att([S_, H])
        y = tf.concat([S, A], axis=-1)

        return self.dense(y), h, c

class Seq2seq(tf.keras.Model):
    def __init__(self, sos, eos):
        super(Seq2seq, self).__init__()
        self.enc = Encoder()
        self.dec = Decoder()
        self.sos = sos
        self.eos = eos

    def call(self, inputs, training=False, mask=None):
        if training is True:
            x, y = inputs
            H, h, c = self.enc(x)
            y, _, _ = self.dec((y, h, c, H))
            return y
        else:
            x = inputs
            H, h, c = self.enc(x)

            y = tf.convert_to_tensor(self.sos)
            y = tf.reshape(y, (1, 1))

            seq = tf.TensorArray(tf.int32, 64)

            for idx in tf.range(64):
                y, h, c = self.dec([y, h, c, H])
                y = tf.cast(tf.argmax(y, axis=-1), dtype=tf.int32)
                y = tf.reshape(y, (1, 1))
                seq = seq.write(idx, y)

                if y == self.eos:
                    break

            return tf.reshape(seq.stack(), (1, 64))

# Implement training loop
@tf.function
def train_step(model, inputs, labels, loss_object, optimizer, train_loss, train_accuracy):
    output_labels = labels[:, 1:]
    shifted_labels = labels[:, :-1]
    with tf.GradientTape() as tape:
        predictions = model([inputs, shifted_labels], training=True)
        loss = loss_object(output_labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)

    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(output_labels, predictions)

# Implement algorithm test
@tf.function
def test_step(model, inputs):
    return model(inputs, training=False)



dataset_file = 'chatbot_data.csv' # acquired from 'http://www.aihub.or.kr' and modified
okt = Okt()

with open(dataset_file, 'r') as file:
    lines = file.readlines()
    seq = [' '.join(okt.morphs(line)) for line in lines]

questions = seq[::2]
answers = ['\t ' + lines for lines in seq[1::2]]

num_sample = len(questions)

perm = list(range(num_sample))
random.seed(0)
random.shuffle(perm)

train_q = list()
train_a = list()
test_q = list()
test_a = list()

for idx, qna in enumerate(zip(questions, answers)):
    q, a = qna
    if perm[idx] > num_sample//5:
        train_q.append(q)
        train_a.append(a)
    else:
        test_q.append(q)
        test_a.append(a)

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=NUM_WORDS,
                                                  filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~')

tokenizer.fit_on_texts(train_q + train_a)

train_q_seq = tokenizer.texts_to_sequences(train_q)
train_a_seq = tokenizer.texts_to_sequences(train_a)

test_q_seq = tokenizer.texts_to_sequences(test_q)
test_a_seq = tokenizer.texts_to_sequences(test_a)

x_train = tf.keras.preprocessing.sequence.pad_sequences(train_q_seq,
                                                        value=0,
                                                        padding='pre',
                                                        maxlen=64)
y_train = tf.keras.preprocessing.sequence.pad_sequences(train_a_seq,
                                                        value=0,
                                                        padding='post',
                                                        maxlen=65)

x_test = tf.keras.preprocessing.sequence.pad_sequences(test_q_seq,
                                                       value=0,
                                                       padding='pre',
                                                       maxlen=64)
y_test = tf.keras.preprocessing.sequence.pad_sequences(test_a_seq,
                                                       value=0,
                                                       padding='post',
                                                       maxlen=65)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32).prefetch(1024)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(1).prefetch(1024)


# Create model
model = Seq2seq(sos=tokenizer.word_index['\t'],
                eos=tokenizer.word_index['\n'])

# Define loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define performance metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')


for epoch in range(EPOCHS):
    for seqs, labels in train_ds:
        train_step(model, seqs, labels, loss_object, optimizer, train_loss, train_accuracy)

    template = 'Epoch {}, Loss: {}, Accuracy: {}'
    print(template.format(epoch + 1,
                          train_loss.result(),
                          train_accuracy.result() * 100))

    train_loss.reset_states()
    train_accuracy.reset_states()

for test_seq, test_labels in test_ds:
    prediction = test_step(model, test_seq)
    test_text = tokenizer.sequences_to_texts(test_seq.numpy())
    gt_text = tokenizer.sequences_to_texts(test_labels.numpy())
    texts = tokenizer.sequences_to_texts(prediction.numpy())
    print('_')
    print('q: ', test_text)
    print('a: ', gt_text)
    print('p: ', texts)
   





Transfer learning

Beginner mode

import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt

EPOCHS = 100

def MyModel():
    feat = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                             include_top=False)
    feat.trainable = False
    
    seq = tf.keras.models.Sequential()
    seq.add(feat) # h x w x c 
    seq.add(tf.keras.layers.GlobalAveragePooling2D()) # c
    seq.add(tf.keras.layers.Dense(1, activation='sigmoid'))
    return seq

split = tfds.Split.TRAIN.subsplit(weighted=(8, 2))
dataset, meta = tfds.load('cats_vs_dogs',
                          split=list(split),
                          with_info=True,
                          as_supervised=True)

train_ds, test_ds = dataset

l2s = meta.features['label'].int2str
for img, label in test_ds.take(2):
    plt.figure()
    plt.imshow(img)
    plt.title(l2s(label))

def preprocess(img, label):
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.image.resize(img, (224, 224))
    return img, label

train_ds = train_ds.map(preprocess).batch(32).prefetch(1024)
test_ds = test_ds.map(preprocess).batch(32).prefetch(1024)


model = MyModel()
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.fit(train_ds, validation_data=test_ds, epochs=EPOCHS)




Expert mode






Pytorch

image




Regression

Simple Linear regression

import torch
import matplotlib.pyplot as plt

def cost():
    return torch.mean((W*X + b - Y) ** 2)

def W_grad():
    return torch.sum((W*X + b - Y) * X)

def b_grad():
    return torch.sum((W*X + b - Y) * 1)

# data
X = torch.FloatTensor([[1], [2], [3]])
Y = torch.FloatTensor([[1], [2], [3]])

# parameters
W = torch.zeros(1); b = torch.zeros(1);
alpha = 0.1; beta = 0.1;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
for i in range(epochs):
    W -= alpha * W_grad(); print('W =', W.item())
    b -= beta * b_grad(); print('b =', b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost().item())
    axes[1].plot(X, W*X + b)
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

image with optimizer

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1], [2], [3]])
Y = torch.FloatTensor([[1], [2], [3]])

# parameters
W = torch.zeros(1, requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr = 0.1;
fig, axes = plt.subplots(1,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = W*X + b
    cost = torch.mean((hypothesis - Y) ** 2)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =', W.item(), 'b =', b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X, W.detach()*X + b.detach())
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X, Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
plt.show()

image




Multi-variable regression

with optimizer

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X1 = torch.FloatTensor([[73], [93], [89], [96], [73]])
X2 = torch.FloatTensor([[80], [88], [91], [98], [66]])
X3 = torch.FloatTensor([[75], [93], [90], [100], [70]])
Y = torch.FloatTensor([[152], [185], [180], [196], [142]])

# parameters
W1 = torch.zeros(1, requires_grad=True)
W2 = torch.zeros(1, requires_grad=True)
W3 = torch.zeros(1, requires_grad=True)
b = torch.zeros(1, requires_grad=True)
lr=1e-5
fig, axes = plt.subplots(2,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W1, W2, W3, b], lr)
for i in range(epochs):
    hypothesis = X1*W1 + X2*W2 + X3*W3 + b
    cost = torch.mean((hypothesis - Y) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W1 =',W1.item(),'W2 =',W2.item(),'W3 =',W3.item(),'b =',b.item())
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[0,1].plot(X1, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
    axes[1,0].plot(X2, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
    axes[1,1].plot(X3, W1.detach()*X1 + W2.detach()*X2 + W3.detach()*X3 + b.detach())
axes[0,0].plot(step, curr_cost, marker='o', ls='-')
axes[0,1].plot(X1, Y, 'x')
axes[1,0].plot(X2, Y, 'x')
axes[1,1].plot(X3, Y, 'x')
axes[0,0].grid(True)
axes[0,1].grid(True)
axes[1,0].grid(True)
axes[1,1].grid(True)
plt.show()

image

with optimizer, vectorization(matrix)

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[73, 80, 75],
                       [93, 88, 93],
                       [89, 91, 90],
                       [96, 98, 100],
                       [73, 66, 70]])
Y = torch.FloatTensor([[152], [185], [180], [196], [142]])


# parameters
W = torch.zeros((3, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1e-5
fig, axes = plt.subplots(2,2, figsize=(10,5))

# gradient descent
epochs = 5
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = X.matmul(W) + b # or .mm or @
    cost = torch.mean((hypothesis - Y) ** 2)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W,'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[0,1].plot(X[:,0], X.matmul(W.detach()) + b.detach())
    axes[1,0].plot(X[:,1], X.matmul(W.detach()) + b.detach())
    axes[1,1].plot(X[:,2], X.matmul(W.detach()) + b.detach())
axes[0,0].plot(step, curr_cost, marker='o', ls='-')
axes[0,1].plot(X[:,0], Y, 'x')
axes[1,0].plot(X[:,1], Y, 'x')
axes[1,1].plot(X[:,2], Y, 'x')
axes[0,0].grid(True)
axes[0,1].grid(True)
axes[1,0].grid(True)
axes[1,1].grid(True)
plt.show()

image




Logistic regression

with optimizer, vectorization(matrix)

import torch
import torch.optim as optim
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]])
Y = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

# parameters
W = torch.zeros((2, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1
fig, axes = plt.subplots(1,3, figsize=(10,5))

# gradient descent
epochs = 10
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = torch.sigmoid(X.matmul(W) + b) # or .mm or @
    cost = -(Y * torch.log(hypothesis) + 
             (1 - Y) * torch.log(1 - hypothesis)).mean()

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W, 'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X[:,0], torch.sigmoid(X.matmul(W.detach()) + b.detach()))
    axes[2].plot(X[:,1], torch.sigmoid(X.matmul(W.detach()) + b.detach()))    
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[:,0], Y, 'x')
axes[2].plot(X[:,1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image

with optimizer, vectorization(matrix), functional

import torch
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt

# data
X = torch.FloatTensor([[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]])
Y = torch.FloatTensor([[0], [0], [0], [1], [1], [1]])

# parameters
W = torch.zeros((2, 1), requires_grad=True); b = torch.zeros(1, requires_grad=True);
lr=1
fig, axes = plt.subplots(1,3, figsize=(10,5))

# gradient descent
epochs = 10
curr_cost = []; step = [];
optimizer = optim.SGD([W, b], lr)
for i in range(epochs):
    hypothesis = torch.sigmoid(X.matmul(W) + b) # or .mm or @
    cost = F.binary_cross_entropy(hypothesis, Y)

    optimizer.zero_grad()
    cost.backward()
    optimizer.step(); print('W =',W, 'b =',b)
    
    # visualize results
    step.append(i+1)
    curr_cost.append(cost.item())
    axes[1].plot(X[:,0], torch.sigmoid(X.matmul(W.detach()) + b.detach()))
    axes[2].plot(X[:,1], torch.sigmoid(X.matmul(W.detach()) + b.detach()))    
axes[0].plot(step, curr_cost, marker='o', ls='-')
axes[1].plot(X[:,0], Y, 'x')
axes[2].plot(X[:,1], Y, 'x')
axes[0].grid(True)
axes[1].grid(True)
axes[2].grid(True)
plt.show()

image




Soft-max regression






Perceptron

XOR





FCN

import torch

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms


seed = 1

lr = 0.001
momentum = 0.5

batch_size = 64
test_batch_size = 64

epochs = 5

no_cuda = False
log_interval = 100


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
        
torch.manual_seed(seed)

use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)


for epoch in range(1, epochs + 1):
    # Train Mode
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # backpropagation 계산하기 전에 0으로 기울기 계산
        output = model(data)
        loss = F.nll_loss(output, target)  # https://pytorch.org/docs/stable/nn.html#nll-loss
        loss.backward()  # 계산한 기울기를 
        optimizer.step()  

        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    
    # Test mode
    model.eval()  # batch norm이나 dropout 등을 train mode 변환
    test_loss = 0
    correct = 0
    with torch.no_grad():  # autograd engine, 즉 backpropagatin이나 gradient 계산 등을 꺼서 memory usage를 줄이고 속도를 높임
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()  # pred와 target과 같은지 확인

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))





CNN





RNN





GAN

github





List of posts followed by this article


Reference


OUTPUT